From 0f8f40486d1b3215c845325744bd545149223805 Mon Sep 17 00:00:00 2001 From: Merry Date: Sun, 8 Jul 2018 20:54:47 +0100 Subject: [PATCH 01/23] ChocolArm64: More accurate implementation of Frecpe & Frecps (#228) * ChocolArm64: More accurate implementation of Frecpe * ChocolArm64: Handle infinities and zeros in Frecps --- .../Instruction/AInstEmitSimdArithmetic.cs | 100 ++------------- .../Instruction/AInstEmitSimdHelper.cs | 20 +++ ChocolArm64/Instruction/ASoftFloat.cs | 120 ++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 39 +++--- 4 files changed, 170 insertions(+), 109 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index b96b71be4..39331f965 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction public static void Frecpe_S(AILEmitterCtx Context) { - EmitFrecpe(Context, 0, Scalar: true); + EmitScalarUnaryOpF(Context, () => + { + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecpe_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorUnaryOpF(Context, () => { - EmitFrecpe(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(1); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(1); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - - Context.Emit(OpCodes.Div); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecps_S(AILEmitterCtx Context) { - EmitFrecps(Context, 0, Scalar: true); + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frecps_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + EmitVectorBinaryOpF(Context, () => { - EmitFrecps(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(2); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(2); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frinta_S(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 0f6ea42ce..d895ec9c7 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } + public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name) + { + IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + MethodInfo MthdInfo; + + if (SizeF == 0) + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (SizeF == 1) */ + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) }); + } + + Context.EmitCall(MthdInfo); + } + public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp; diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 1bd716658..e63c82bee 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction static ASoftFloat() { InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); + RecipEstimateTable = BuildRecipEstimateTable(); } + private static readonly byte[] RecipEstimateTable; private static readonly byte[] InvSqrtEstimateTable; private static byte[] BuildInvSqrtEstimateTable() @@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction return Table; } + private static byte[] BuildRecipEstimateTable() + { + byte[] Table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + Table[index] = (byte)(b & 0xFF); + } + return Table; + } + public static float InvSqrtEstimate(float x) { return (float)InvSqrtEstimate((double)x); @@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction ulong result = x_sign | (result_exp << 52) | fraction; return BitConverter.Int64BitsToDouble((long)result); } + + public static float RecipEstimate(float x) + { + return (float)RecipEstimate((double)x); + } + + public static double RecipEstimate(double x) + { + ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong x_sign = x_bits & 0x8000000000000000; + ulong x_exp = (x_bits >> 52) & 0x7FF; + ulong scaled = x_bits & ((1ul << 52) - 1); + + if (x_exp >= 2045) + { + if (x_exp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + + if (x_exp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + x_exp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong result_exp = (2045 - x_exp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (result_exp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (result_exp == 0x7FF) + { + result_exp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = x_sign | (result_exp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + public static float RecipStep(float op1, float op2) + { + return (float)RecipStep((double)op1, (double)op2); + } + + public static double RecipStep(double op1, double op2) + { + op1 = -op1; + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + ulong op1_sign = op1_bits & 0x8000000000000000; + ulong op2_sign = op2_bits & 0x8000000000000000; + ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; + ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; + + bool inf1 = op1_other == 0x7ff0000000000000; + bool inf2 = op2_other == 0x7ff0000000000000; + bool zero1 = op1_other == 0; + bool zero2 = op2_other == 0; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + return 2.0; + } + else if (inf1 || inf2) + { + // Infinity + return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign))); + } + + return 2.0 + op1 * op2; + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 98be2fc5b..2a0f5ed91 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -163,26 +163,18 @@ namespace Ryujinx.Tests.Cpu Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B)); } - [Test, Description("FRECPE D0, D1")] - public void Frecpe_S([Random(100)] double A) + [TestCase(0x00000000u, 0x7F800000u)] + [TestCase(0x80000000u, 0xFF800000u)] + [TestCase(0x00FFF000u, 0x7E000000u)] + [TestCase(0x41200000u, 0x3DCC8000u)] + [TestCase(0xC1200000u, 0xBDCC8000u)] + [TestCase(0x001FFFFFu, 0x7F800000u)] + [TestCase(0x007FF000u, 0x7E800000u)] + public void Frecpe_S(uint A, uint Result) { - AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A)); - - Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A)); - } - - [Test, Description("FRECPE V2.4S, V0.4S")] - public void Frecpe_V([Random(100)] float A) - { - AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A)); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A)); - Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A)); - }); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(0x5EA1D820, V1: V1); + Assert.AreEqual(Result, GetVectorE0(ThreadState.V0)); } [Test, Description("FRECPS D0, D1, D2")] @@ -202,12 +194,13 @@ namespace Ryujinx.Tests.Cpu V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); + float Result = (float)(2 - ((double)A * (double)B)); Assert.Multiple(() => { - Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result)); }); } From 095db47e132a475e25d128e691ebdae101611cc9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 8 Jul 2018 16:55:15 -0300 Subject: [PATCH 02/23] Query multiple pages at once with GetWriteWatch (#222) * Query multiple pages at once with GetWriteWatch * Allow multiple buffer types to share the same page, aways use the physical address as cache key * Remove a variable that is no longer needed --- ChocolArm64/Memory/AMemory.cs | 58 ++++++----- ChocolArm64/Memory/AMemoryWin32.cs | 29 +++++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 32 +++--- Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs | 3 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs | 6 +- Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs | 120 ++++++++++++---------- 6 files changed, 145 insertions(+), 103 deletions(-) diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index c02bf172f..da5cf0074 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -33,19 +33,25 @@ namespace ChocolArm64.Memory private byte* RamPtr; + private int HostPageSize; + public AMemory() { Manager = new AMemoryMgr(); Monitors = new Dictionary(); + IntPtr Size = (IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - Ram = AMemoryWin32.Allocate((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = AMemoryWin32.Allocate(Size); + + HostPageSize = AMemoryWin32.GetPageSize(Ram, Size); } else { - Ram = Marshal.AllocHGlobal((IntPtr)AMemoryMgr.RamSize + AMemoryMgr.PageSize); + Ram = Marshal.AllocHGlobal(Size); } RamPtr = (byte*)Ram; @@ -149,49 +155,53 @@ namespace ChocolArm64.Memory } } - public long GetHostPageSize() + public int GetHostPageSize() { - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - return AMemoryMgr.PageSize; - } - - IntPtr MemAddress = new IntPtr(RamPtr); - IntPtr MemSize = new IntPtr(AMemoryMgr.RamSize); - - long PageSize = AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: false); - - if (PageSize < 1) - { - throw new InvalidOperationException(); - } - - return PageSize; + return HostPageSize; } - public bool IsRegionModified(long Position, long Size) + public bool[] IsRegionModified(long Position, long Size) { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return true; + return null; } long EndPos = Position + Size; if ((ulong)EndPos < (ulong)Position) { - return false; + return null; } if ((ulong)EndPos > AMemoryMgr.RamSize) { - return false; + return null; } IntPtr MemAddress = new IntPtr(RamPtr + Position); IntPtr MemSize = new IntPtr(Size); - return AMemoryWin32.IsRegionModified(MemAddress, MemSize, Reset: true) != 0; + int HostPageMask = HostPageSize - 1; + + Position &= ~HostPageMask; + + Size = EndPos - Position; + + IntPtr[] Addresses = new IntPtr[(Size + HostPageMask) / HostPageSize]; + + AMemoryWin32.IsRegionModified(MemAddress, MemSize, Addresses, out int Count); + + bool[] Modified = new bool[Addresses.Length]; + + for (int Index = 0; Index < Count; Index++) + { + long VA = Addresses[Index].ToInt64() - Ram.ToInt64(); + + Modified[(VA - Position) / HostPageSize] = true; + } + + return Modified; } public sbyte ReadSByte(long Position) diff --git a/ChocolArm64/Memory/AMemoryWin32.cs b/ChocolArm64/Memory/AMemoryWin32.cs index d097dc871..387ca32c2 100644 --- a/ChocolArm64/Memory/AMemoryWin32.cs +++ b/ChocolArm64/Memory/AMemoryWin32.cs @@ -49,7 +49,7 @@ namespace ChocolArm64.Memory VirtualFree(Address, IntPtr.Zero, MEM_RELEASE); } - public unsafe static long IsRegionModified(IntPtr Address, IntPtr Size, bool Reset) + public unsafe static int GetPageSize(IntPtr Address, IntPtr Size) { IntPtr[] Addresses = new IntPtr[1]; @@ -57,17 +57,36 @@ namespace ChocolArm64.Memory long Granularity; - int Flags = Reset ? WRITE_WATCH_FLAG_RESET : 0; - GetWriteWatch( - Flags, + 0, Address, Size, Addresses, &Count, &Granularity); - return Count != 0 ? Granularity : 0; + return (int)Granularity; + } + + public unsafe static void IsRegionModified( + IntPtr Address, + IntPtr Size, + IntPtr[] Addresses, + out int AddrCount) + { + long Count = Addresses.Length; + + long Granularity; + + GetWriteWatch( + WRITE_WATCH_FLAG_RESET, + Address, + Size, + Addresses, + &Count, + &Granularity); + + AddrCount = (int)Count; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 10c99494b..b9f9cc497 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -464,19 +464,17 @@ namespace Ryujinx.HLE.Gpu.Engines GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition); - long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; + long Key = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff; - long Key = TextureAddress; + Key = Vmm.GetPhysicalAddress(Key); - TextureAddress = Vmm.GetPhysicalAddress(TextureAddress); - - if (IsFrameBufferPosition(TextureAddress)) + if (IsFrameBufferPosition(Key)) { //This texture is a frame buffer texture, //we shouldn't read anything from memory and bind //the frame buffer texture instead, since we're not //really writing anything to memory. - Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex); + Gpu.Renderer.FrameBuffer.BindTexture(Key, TexIndex); } else { @@ -544,6 +542,8 @@ namespace Ryujinx.HLE.Gpu.Engines { long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress); + long IboKey = Vmm.GetPhysicalAddress(IndexPosition); + int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat); int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst); int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount); @@ -561,16 +561,16 @@ namespace Ryujinx.HLE.Gpu.Engines { int IbSize = IndexCount * IndexEntrySize; - bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize); + bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize); - if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index)) + if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index)) { byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize); - Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data); + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -619,20 +619,22 @@ namespace Ryujinx.HLE.Gpu.Engines continue; } + long VboKey = Vmm.GetPhysicalAddress(VertexPosition); + int Stride = Control & 0xfff; long VbSize = (VertexEndPos - VertexPosition) + 1; - bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize); + bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize); - if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex)) + if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex)) { byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize); - Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data); + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); @@ -641,7 +643,7 @@ namespace Ryujinx.HLE.Gpu.Engines { int VertexBase = ReadRegister(NvGpuEngine3dReg.VertexArrayElemBase); - Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, VertexBase, PrimType); + Gpu.Renderer.Rasterizer.DrawElements(IboKey, IndexFirst, VertexBase, PrimType); } else { diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs index 7474aa33f..469cd6cd0 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuBufferType.cs @@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Gpu.Memory { Index, Vertex, - Texture + Texture, + Count } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs index 36f6406a1..0c81dd150 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs @@ -274,11 +274,9 @@ namespace Ryujinx.HLE.Gpu.Memory PageTable[L0][L1] = TgtAddr; } - public bool IsRegionModified(long Position, long Size, NvGpuBufferType BufferType) + public bool IsRegionModified(long PA, long Size, NvGpuBufferType BufferType) { - long PA = GetPhysicalAddress(Position); - - return Cache.IsRegionModified(Memory, BufferType, Position, PA, Size); + return Cache.IsRegionModified(Memory, BufferType, PA, Size); } public byte ReadByte(long Position) diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs index c7108f00c..ac9bd850e 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmmCache.cs @@ -11,43 +11,53 @@ namespace Ryujinx.HLE.Gpu.Memory private class CachedPage { - private List<(long Start, long End)> Regions; + private struct Range + { + public long Start; + public long End; + + public Range(long Start, long End) + { + this.Start = Start; + this.End = End; + } + } + + private List[] Regions; public LinkedListNode Node { get; set; } - public int Count => Regions.Count; - public int Timestamp { get; private set; } - public long PABase { get; private set; } - - public NvGpuBufferType BufferType { get; private set; } - - public CachedPage(long PABase, NvGpuBufferType BufferType) + public CachedPage() { - this.PABase = PABase; - this.BufferType = BufferType; + Regions = new List[(int)NvGpuBufferType.Count]; - Regions = new List<(long, long)>(); + for (int Index = 0; Index < Regions.Length; Index++) + { + Regions[Index] = new List(); + } } - public bool AddRange(long Start, long End) + public bool AddRange(long Start, long End, NvGpuBufferType BufferType) { - for (int Index = 0; Index < Regions.Count; Index++) - { - (long RgStart, long RgEnd) = Regions[Index]; + List BtRegions = Regions[(int)BufferType]; - if (Start >= RgStart && End <= RgEnd) + for (int Index = 0; Index < BtRegions.Count; Index++) + { + Range Rg = BtRegions[Index]; + + if (Start >= Rg.Start && End <= Rg.End) { return false; } - if (Start <= RgEnd && RgStart <= End) + if (Start <= Rg.End && Rg.Start <= End) { - long MinStart = Math.Min(RgStart, Start); - long MaxEnd = Math.Max(RgEnd, End); + long MinStart = Math.Min(Rg.Start, Start); + long MaxEnd = Math.Max(Rg.End, End); - Regions[Index] = (MinStart, MaxEnd); + BtRegions[Index] = new Range(MinStart, MaxEnd); Timestamp = Environment.TickCount; @@ -55,12 +65,24 @@ namespace Ryujinx.HLE.Gpu.Memory } } - Regions.Add((Start, End)); + BtRegions.Add(new Range(Start, End)); Timestamp = Environment.TickCount; return true; } + + public int GetTotalCount() + { + int Count = 0; + + for (int Index = 0; Index < Regions.Length; Index++) + { + Count += Regions[Index].Count; + } + + return Count; + } } private Dictionary Cache; @@ -76,71 +98,61 @@ namespace Ryujinx.HLE.Gpu.Memory SortedCache = new LinkedList(); } - public bool IsRegionModified( - AMemory Memory, - NvGpuBufferType BufferType, - long VA, - long PA, - long Size) + public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size) { + bool[] Modified = Memory.IsRegionModified(PA, Size); + + if (Modified == null) + { + return true; + } + ClearCachedPagesIfNeeded(); long PageSize = Memory.GetHostPageSize(); long Mask = PageSize - 1; - long VAEnd = VA + Size; long PAEnd = PA + Size; bool RegMod = false; - while (VA < VAEnd) - { - long Key = VA & ~Mask; - long PABase = PA & ~Mask; + int Index = 0; + + while (PA < PAEnd) + { + long Key = PA & ~Mask; - long VAPgEnd = Math.Min((VA + PageSize) & ~Mask, VAEnd); long PAPgEnd = Math.Min((PA + PageSize) & ~Mask, PAEnd); bool IsCached = Cache.TryGetValue(Key, out CachedPage Cp); - bool PgReset = false; - - if (!IsCached) + if (IsCached) { - Cp = new CachedPage(PABase, BufferType); + CpCount -= Cp.GetTotalCount(); - Cache.Add(Key, Cp); + SortedCache.Remove(Cp.Node); } else { - CpCount -= Cp.Count; + Cp = new CachedPage(); - SortedCache.Remove(Cp.Node); - - if (Cp.PABase != PABase || - Cp.BufferType != BufferType) - { - PgReset = true; - } + Cache.Add(Key, Cp); } - PgReset |= Memory.IsRegionModified(PA, PAPgEnd - PA) && IsCached; - - if (PgReset) + if (Modified[Index++] && IsCached) { - Cp = new CachedPage(PABase, BufferType); + Cp = new CachedPage(); Cache[Key] = Cp; } Cp.Node = SortedCache.AddLast(Key); - RegMod |= Cp.AddRange(VA, VAPgEnd); + RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType); - CpCount += Cp.Count; + CpCount += Cp.GetTotalCount(); - VA = VAPgEnd; PA = PAPgEnd; } @@ -169,7 +181,7 @@ namespace Ryujinx.HLE.Gpu.Memory Cache.Remove(Key); - CpCount -= Cp.Count; + CpCount -= Cp.GetTotalCount(); TimeDelta = RingDelta(Cp.Timestamp, Timestamp); } From 0a36bfbf921038e8eb7d4294ec8543903c933d90 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 22:48:28 -0300 Subject: [PATCH 03/23] Fix ZIP/UZP/TRN instructions when Rd == Rn || Rd == Rm (#239) --- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 95fe59499..d67946a97 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -339,9 +339,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); @@ -363,9 +366,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); @@ -387,9 +393,12 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); From 791fe70810f0f0f417c74aaff5446551bed78fee Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 22:49:07 -0300 Subject: [PATCH 04/23] Allow sample rate of 0 on OpenAudioOut, fix 5.1 sound output (#240) --- Ryujinx.Audio/OpenAL/OpenALAudioOut.cs | 27 ++++++++++++------- .../OsHle/Services/Aud/IAudioOutManager.cs | 15 ++++++++--- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs index f574b46f3..2860dc2e2 100644 --- a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs +++ b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs @@ -20,7 +20,7 @@ namespace Ryujinx.Audio.OpenAL public int SourceId { get; private set; } public int SampleRate { get; private set; } - + public ALFormat Format { get; private set; } private ReleaseCallback Callback; @@ -153,7 +153,7 @@ namespace Ryujinx.Audio.OpenAL ShouldCallReleaseCallback = true; } } - + private void SyncQueuedTags() { AL.GetSource(SourceId, ALGetSourcei.BuffersQueued, out int QueuedCount); @@ -249,11 +249,6 @@ namespace Ryujinx.Audio.OpenAL private ALFormat GetALFormat(int Channels, AudioFormat Format) { - if (Channels < 1 || Channels > 2) - { - throw new ArgumentOutOfRangeException(nameof(Channels)); - } - if (Channels == 1) { switch (Format) @@ -262,7 +257,7 @@ namespace Ryujinx.Audio.OpenAL case AudioFormat.PcmInt16: return ALFormat.Mono16; } } - else /* if (Channels == 2) */ + else if (Channels == 2) { switch (Format) { @@ -270,6 +265,18 @@ namespace Ryujinx.Audio.OpenAL case AudioFormat.PcmInt16: return ALFormat.Stereo16; } } + else if (Channels == 6) + { + switch (Format) + { + case AudioFormat.PcmInt8: return ALFormat.Multi51Chn8Ext; + case AudioFormat.PcmInt16: return ALFormat.Multi51Chn16Ext; + } + } + else + { + throw new ArgumentOutOfRangeException(nameof(Channels)); + } throw new ArgumentException(nameof(Format)); } @@ -288,7 +295,7 @@ namespace Ryujinx.Audio.OpenAL { return Td.ContainsBuffer(Tag); } - + return false; } @@ -298,7 +305,7 @@ namespace Ryujinx.Audio.OpenAL { return Td.GetReleasedBuffers(MaxCount); } - + return null; } diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs index 54ffa6d90..8c78d1d49 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs @@ -14,6 +14,10 @@ namespace Ryujinx.HLE.OsHle.Services.Aud { private const string DefaultAudioOutput = "DeviceOut"; + private const int DefaultSampleRate = 48000; + + private const int DefaultChannelsCount = 2; + private Dictionary m_Commands; public override IReadOnlyDictionary Commands => m_Commands; @@ -122,7 +126,12 @@ namespace Ryujinx.HLE.OsHle.Services.Aud int SampleRate = Context.RequestData.ReadInt32(); int Channels = Context.RequestData.ReadInt32(); - if (SampleRate != 48000) + if (SampleRate == 0) + { + SampleRate = DefaultSampleRate; + } + + if (SampleRate != DefaultSampleRate) { Context.Ns.Log.PrintWarning(LogClass.Audio, "Invalid sample rate!"); @@ -133,7 +142,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud if (Channels == 0) { - Channels = 2; + Channels = DefaultChannelsCount; } KEvent ReleaseEvent = new KEvent(); @@ -145,7 +154,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud IAalOutput AudioOut = Context.Ns.AudioOut; - int Track = AudioOut.OpenTrack(SampleRate, 2, Callback, out AudioFormat Format); + int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format); MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track)); From 1968386808bb48f823b1877b0e5ff8c6e2f8bd49 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 9 Jul 2018 23:01:59 -0300 Subject: [PATCH 05/23] Add locking methods to the ogl resource cache (#238) * Add locking methods to the ogl resource cache * Remove some unused arguments * Add the ZF32 texture format --- Ryujinx.Graphics/Gal/GalTextureFormat.cs | 1 + Ryujinx.Graphics/Gal/IGalRasterizer.cs | 7 ++- Ryujinx.Graphics/Gal/IGalTexture.cs | 3 ++ .../Gal/OpenGL/OGLCachedResource.cs | 43 ++++++++++++++++++- .../Gal/OpenGL/OGLEnumConverter.cs | 19 ++++---- Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 16 ++++++- Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs | 10 +++++ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 20 ++++++++- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 28 ++++++++---- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 1 + 10 files changed, 122 insertions(+), 26 deletions(-) diff --git a/Ryujinx.Graphics/Gal/GalTextureFormat.cs b/Ryujinx.Graphics/Gal/GalTextureFormat.cs index 7d19dc26d..231d33ec0 100644 --- a/Ryujinx.Graphics/Gal/GalTextureFormat.cs +++ b/Ryujinx.Graphics/Gal/GalTextureFormat.cs @@ -17,6 +17,7 @@ namespace Ryujinx.Graphics.Gal BC3 = 0x26, BC4 = 0x27, BC5 = 0x28, + ZF32 = 0x2f, Astc2D4x4 = 0x40, Astc2D5x5 = 0x41, Astc2D6x6 = 0x42, diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index 2598efb61..0c5d37e40 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Gal { public interface IGalRasterizer { + void LockCaches(); + void UnlockCaches(); + void ClearBuffers(GalClearBufferFlags Flags); bool IsVboCached(long Key, long DataSize); @@ -46,9 +49,9 @@ namespace Ryujinx.Graphics.Gal void CreateIbo(long Key, byte[] Buffer); - void SetVertexArray(int VbIndex, int Stride, long VboKey, GalVertexAttrib[] Attribs); + void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs); - void SetIndexArray(long Key, int Size, GalIndexFormat Format); + void SetIndexArray(int Size, GalIndexFormat Format); void DrawArrays(int First, int PrimCount, GalPrimitiveType PrimType); diff --git a/Ryujinx.Graphics/Gal/IGalTexture.cs b/Ryujinx.Graphics/Gal/IGalTexture.cs index 6379e73af..2ab411990 100644 --- a/Ryujinx.Graphics/Gal/IGalTexture.cs +++ b/Ryujinx.Graphics/Gal/IGalTexture.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Gal { public interface IGalTexture { + void LockCache(); + void UnlockCache(); + void Create(long Key, byte[] Data, GalTexture Texture); bool TryGetCachedTexture(long Key, long DataSize, out GalTexture Texture); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs index 06d76b8bd..01ebf9820 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLCachedResource.cs @@ -36,6 +36,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL private DeleteValue DeleteValueCallback; + private Queue DeletePending; + + private bool Locked; + public OGLCachedResource(DeleteValue DeleteValueCallback) { if (DeleteValueCallback == null) @@ -48,11 +52,33 @@ namespace Ryujinx.Graphics.Gal.OpenGL Cache = new Dictionary(); SortedCache = new LinkedList(); + + DeletePending = new Queue(); + } + + public void Lock() + { + Locked = true; + } + + public void Unlock() + { + Locked = false; + + while (DeletePending.TryDequeue(out T Value)) + { + DeleteValueCallback(Value); + } + + ClearCacheIfNeeded(); } public void AddOrUpdate(long Key, T Value, long Size) { - ClearCacheIfNeeded(); + if (!Locked) + { + ClearCacheIfNeeded(); + } LinkedListNode Node = SortedCache.AddLast(Key); @@ -60,7 +86,14 @@ namespace Ryujinx.Graphics.Gal.OpenGL if (Cache.TryGetValue(Key, out CacheBucket Bucket)) { - DeleteValueCallback(Bucket.Value); + if (Locked) + { + DeletePending.Enqueue(Bucket.Value); + } + else + { + DeleteValueCallback(Bucket.Value); + } SortedCache.Remove(Bucket.Node); @@ -78,6 +111,12 @@ namespace Ryujinx.Graphics.Gal.OpenGL { Value = Bucket.Value; + SortedCache.Remove(Bucket.Node); + + LinkedListNode Node = SortedCache.AddLast(Key); + + Cache[Key] = new CacheBucket(Value, Bucket.DataSize, Node); + return true; } diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 3a81150d6..8f189d2b0 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -129,15 +129,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL { switch (Format) { - case GalTextureFormat.R32G32B32A32: return (PixelFormat.Rgba, PixelType.Float); - case GalTextureFormat.R16G16B16A16: return (PixelFormat.Rgba, PixelType.HalfFloat); - case GalTextureFormat.A8B8G8R8: return (PixelFormat.Rgba, PixelType.UnsignedByte); - case GalTextureFormat.R32: return (PixelFormat.Red, PixelType.Float); - case GalTextureFormat.A1B5G5R5: return (PixelFormat.Rgba, PixelType.UnsignedShort5551); - case GalTextureFormat.B5G6R5: return (PixelFormat.Rgb, PixelType.UnsignedShort565); - case GalTextureFormat.G8R8: return (PixelFormat.Rg, PixelType.UnsignedByte); - case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); - case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); + case GalTextureFormat.R32G32B32A32: return (PixelFormat.Rgba, PixelType.Float); + case GalTextureFormat.R16G16B16A16: return (PixelFormat.Rgba, PixelType.HalfFloat); + case GalTextureFormat.A8B8G8R8: return (PixelFormat.Rgba, PixelType.UnsignedByte); + case GalTextureFormat.R32: return (PixelFormat.Red, PixelType.Float); + case GalTextureFormat.A1B5G5R5: return (PixelFormat.Rgba, PixelType.UnsignedShort5551); + case GalTextureFormat.B5G6R5: return (PixelFormat.Rgb, PixelType.UnsignedShort565); + case GalTextureFormat.G8R8: return (PixelFormat.Rg, PixelType.UnsignedByte); + case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); + case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); + case GalTextureFormat.ZF32: return (PixelFormat.DepthComponent, PixelType.Float); } throw new NotImplementedException(Format.ToString()); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index a4ec7f87c..0dc56966b 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -71,6 +71,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL IndexBuffer = new IbInfo(); } + public void LockCaches() + { + VboCache.Lock(); + IboCache.Lock(); + } + + public void UnlockCaches() + { + VboCache.Unlock(); + IboCache.Unlock(); + } + public void ClearBuffers(GalClearBufferFlags Flags) { ClearBufferMask Mask = ClearBufferMask.ColorBufferBit; @@ -223,7 +235,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.BufferData(BufferTarget.ElementArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw); } - public void SetVertexArray(int VbIndex, int Stride, long VboKey, GalVertexAttrib[] Attribs) + public void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs) { if (!VboCache.TryGetValue(VboKey, out int VboHandle)) { @@ -270,7 +282,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } - public void SetIndexArray(long Key, int Size, GalIndexFormat Format) + public void SetIndexArray(int Size, GalIndexFormat Format) { IndexBuffer.Type = OGLEnumConverter.GetDrawElementsType(Format); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs index c50bdd71b..5caca6ecd 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLTexture.cs @@ -26,6 +26,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL TextureCache = new OGLCachedResource(DeleteTexture); } + public void LockCache() + { + TextureCache.Lock(); + } + + public void UnlockCache() + { + TextureCache.Unlock(); + } + private static void DeleteTexture(TCE CachedTexture) { GL.DeleteTexture(CachedTexture.Handle); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index b9f9cc497..2bacd71b3 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -73,6 +73,8 @@ namespace Ryujinx.HLE.Gpu.Engines private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) { + LockCaches(); + SetFrameBuffer(Vmm, 0); long[] Keys = UploadShaders(Vmm); @@ -90,6 +92,20 @@ namespace Ryujinx.HLE.Gpu.Engines UploadTextures(Vmm, Keys); UploadUniforms(Vmm); UploadVertexArrays(Vmm); + + UnlockCaches(); + } + + private void LockCaches() + { + Gpu.Renderer.Rasterizer.LockCaches(); + Gpu.Renderer.Texture.LockCache(); + } + + private void UnlockCaches() + { + Gpu.Renderer.Rasterizer.UnlockCaches(); + Gpu.Renderer.Texture.UnlockCache(); } private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) @@ -570,7 +586,7 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); } - Gpu.Renderer.Rasterizer.SetIndexArray(IboKey, IbSize, IndexFormat); + Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat); } List[] Attribs = new List[32]; @@ -634,7 +650,7 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); } - Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VboKey, Attribs[Index].ToArray()); + Gpu.Renderer.Rasterizer.SetVertexArray(Stride, VboKey, Attribs[Index].ToArray()); } GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff); diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index ac8f75c5f..3c633b692 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -28,15 +28,25 @@ namespace Ryujinx.HLE.Gpu.Texture { switch (Texture.Format) { - case GalTextureFormat.R32G32B32A32: return Texture.Width * Texture.Height * 16; - case GalTextureFormat.R16G16B16A16: return Texture.Width * Texture.Height * 8; - case GalTextureFormat.A8B8G8R8: return Texture.Width * Texture.Height * 4; - case GalTextureFormat.R32: return Texture.Width * Texture.Height * 4; - case GalTextureFormat.A1B5G5R5: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.B5G6R5: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.G8R8: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.R16: return Texture.Width * Texture.Height * 2; - case GalTextureFormat.R8: return Texture.Width * Texture.Height; + case GalTextureFormat.R32G32B32A32: + return Texture.Width * Texture.Height * 16; + + case GalTextureFormat.R16G16B16A16: + return Texture.Width * Texture.Height * 8; + + case GalTextureFormat.A8B8G8R8: + case GalTextureFormat.R32: + case GalTextureFormat.ZF32: + return Texture.Width * Texture.Height * 4; + + case GalTextureFormat.A1B5G5R5: + case GalTextureFormat.B5G6R5: + case GalTextureFormat.G8R8: + case GalTextureFormat.R16: + return Texture.Width * Texture.Height * 2; + + case GalTextureFormat.R8: + return Texture.Width * Texture.Height; case GalTextureFormat.BC1: case GalTextureFormat.BC4: diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 48bf1a90f..24bceffb1 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -25,6 +25,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture); case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); case GalTextureFormat.BC5: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); case GalTextureFormat.Astc2D4x4: return Read16Bpt4x4(Memory, Texture); } From 09c53fe06fd9dd6ec6a9e585771f73d7e45f7148 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Thu, 12 Jul 2018 00:08:20 +0200 Subject: [PATCH 06/23] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 71dad9ce2..f6bac98c8 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,8 @@ If you have some homebrew that currently doesn't work within the emulator, you c For help, support, suggestions, or if you just want to get in touch with the team; join our Discord server! https://discord.gg/VkQYXAZ +For donation support, please take a look at our Patreon: https://www.patreon.com/ryujinx + **Running** To run this emulator, you need the .NET Core 2.1 (or higher) SDK *and* the OpenAL 11 Core SDK. @@ -92,6 +94,7 @@ Run `dotnet run -c Release -- path\to\homebrew.nro` inside the Ryujinx solution Run `dotnet run -c Release -- path\to\game_exefs_and_romfs_folder` to run official games (they need to be decrypted and extracted first!) **Compatibility** + You can check out the compatibility list within the Wiki. Only a handful of games actually work. **Latest build** From 37071285bcf14ef96b16b55cb6ed7c8c003489e0 Mon Sep 17 00:00:00 2001 From: David <25727384+ogniK5377@users.noreply.github.com> Date: Thu, 12 Jul 2018 11:41:35 +1000 Subject: [PATCH 07/23] NvGetConfig with production/non production swapping (#243) * GetConfig should return 0x30006 in production mode * GetConfig will now check settings only if nv!rmos_set_production_mode is set to "0" * Code formatting, TryGetValue * Slight fixup * dont forget the setting * Implemented non production mode setting grabbing * format issue * style changes --- .../Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs | 59 ++++++++++++++++--- Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs | 17 +++--- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs index a9fd9d3ab..7705a1f78 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs @@ -2,6 +2,7 @@ using ChocolArm64.Memory; using Ryujinx.HLE.Logging; using System; using System.Collections.Concurrent; +using System.Text; using System.Threading; namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl @@ -10,9 +11,16 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl { private static ConcurrentDictionary UserCtxs; + private static bool IsProductionMode = true; + static NvHostCtrlIoctl() { UserCtxs = new ConcurrentDictionary(); + + if (Set.NxSettings.Settings.TryGetValue("nv!rmos_set_production_mode", out object ProductionModeSetting)) + { + IsProductionMode = ((string)ProductionModeSetting) != "0"; // Default value is "" + } } public static int ProcessIoctl(ServiceCtx Context, int Cmd) @@ -71,17 +79,52 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl private static int GetConfig(ServiceCtx Context) { - long InputPosition = Context.Request.GetBufferType0x21().Position; - long OutputPosition = Context.Request.GetBufferType0x22().Position; + if (!IsProductionMode) + { + long InputPosition = Context.Request.GetBufferType0x21().Position; + long OutputPosition = Context.Request.GetBufferType0x22().Position; - string Nv = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0, 0x41); - string Name = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0x41, 0x41); + string Domain = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0, 0x41); + string Name = AMemoryHelper.ReadAsciiString(Context.Memory, InputPosition + 0x41, 0x41); - Context.Memory.WriteByte(OutputPosition + 0x82, 0); + if (Set.NxSettings.Settings.TryGetValue($"{Domain}!{Name}", out object NvSetting)) + { + byte[] SettingBuffer = new byte[0x101]; - Context.Ns.Log.PrintStub(LogClass.ServiceNv, "Stubbed."); + if (NvSetting is string StringValue) + { + if (StringValue.Length > 0x100) + { + Context.Ns.Log.PrintError(Logging.LogClass.ServiceNv, $"{Domain}!{Name} String value size is too big!"); + } + else + { + SettingBuffer = Encoding.ASCII.GetBytes(StringValue + "\0"); + } + } - return NvResult.Success; + if (NvSetting is int IntValue) + { + SettingBuffer = BitConverter.GetBytes(IntValue); + } + else if (NvSetting is bool BoolValue) + { + SettingBuffer[0] = BoolValue ? (byte)1 : (byte)0; + } + else + { + throw new NotImplementedException(NvSetting.GetType().Name); + } + + Context.Memory.WriteBytes(OutputPosition + 0x82, SettingBuffer); + + Context.Ns.Log.PrintDebug(Logging.LogClass.ServiceNv, $"Got setting {Domain}!{Name}"); + } + + return NvResult.Success; + } + + return NvResult.NotAvailableInProduction; } private static int EventWait(ServiceCtx Context) @@ -352,4 +395,4 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostCtrl UserCtxs.TryRemove(Process, out _); } } -} \ No newline at end of file +} diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs index 720f5ccf2..78ae5ae33 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvResult.cs @@ -2,12 +2,13 @@ namespace Ryujinx.HLE.OsHle.Services.Nv { static class NvResult { - public const int Success = 0; - public const int TryAgain = -11; - public const int OutOfMemory = -12; - public const int InvalidInput = -22; - public const int NotSupported = -25; - public const int Restart = -85; - public const int TimedOut = -110; + public const int NotAvailableInProduction = 196614; + public const int Success = 0; + public const int TryAgain = -11; + public const int OutOfMemory = -12; + public const int InvalidInput = -22; + public const int NotSupported = -25; + public const int Restart = -85; + public const int TimedOut = -110; } -} \ No newline at end of file +} From cd18ab29dfacd1f7a3218d4ec73ce664bccc3887 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 12 Jul 2018 14:03:52 -0300 Subject: [PATCH 08/23] Loop GLScreen with custom method (#244) * Loop GLScreen with custom method * Fix deadlocks * Fix screen resizing * Change event to bool * Try to fix quitting error * Set title from main thread * Queue max 1 vsync, fix high FPS after a slowdown --- Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs | 7 ++ Ryujinx.HLE/Switch.cs | 5 ++ Ryujinx/Ui/GLScreen.cs | 110 ++++++++++++++++++++++++--- Ryujinx/Ui/Program.cs | 2 +- 4 files changed, 113 insertions(+), 11 deletions(-) diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs index 0bc682a70..7b999eaed 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs @@ -1,5 +1,6 @@ using Ryujinx.HLE.Gpu.Memory; using System.Collections.Concurrent; +using System.Threading; namespace Ryujinx.HLE.Gpu.Engines { @@ -18,6 +19,8 @@ namespace Ryujinx.HLE.Gpu.Engines private NvGpuEngine[] SubChannels; + public AutoResetEvent Event { get; private set; } + private struct CachedMacro { public int Position { get; private set; } @@ -60,6 +63,8 @@ namespace Ryujinx.HLE.Gpu.Engines Macros = new CachedMacro[MacrosCount]; Mme = new int[MmeWords]; + + Event = new AutoResetEvent(false); } public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer) @@ -68,6 +73,8 @@ namespace Ryujinx.HLE.Gpu.Engines { BufferQueue.Enqueue((Vmm, PBEntry)); } + + Event.Set(); } public void DispatchCalls() diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs index f7b263cd0..1946b187b 100644 --- a/Ryujinx.HLE/Switch.cs +++ b/Ryujinx.HLE/Switch.cs @@ -71,6 +71,11 @@ namespace Ryujinx.HLE Os.LoadProgram(FileName); } + public bool WaitFifo() + { + return Gpu.Fifo.Event.WaitOne(8); + } + public void ProcessFrame() { Gpu.Fifo.DispatchCalls(); diff --git a/Ryujinx/Ui/GLScreen.cs b/Ryujinx/Ui/GLScreen.cs index 7a4e42e9e..9b5dda4f0 100644 --- a/Ryujinx/Ui/GLScreen.cs +++ b/Ryujinx/Ui/GLScreen.cs @@ -5,6 +5,9 @@ using Ryujinx.Graphics.Gal; using Ryujinx.HLE; using Ryujinx.HLE.Input; using System; +using System.Threading; + +using Stopwatch = System.Diagnostics.Stopwatch; namespace Ryujinx { @@ -16,6 +19,8 @@ namespace Ryujinx private const float TouchScreenRatioX = (float)TouchScreenWidth / TouchScreenHeight; private const float TouchScreenRatioY = (float)TouchScreenHeight / TouchScreenWidth; + private const int TargetFPS = 60; + private Switch Ns; private IGalRenderer Renderer; @@ -24,6 +29,14 @@ namespace Ryujinx private MouseState? Mouse = null; + private Thread RenderThread; + + private bool ResizeEvent; + + private bool TitleEvent; + + private string NewTitle; + public GLScreen(Switch Ns, IGalRenderer Renderer) : base(1280, 720, new GraphicsMode(), "Ryujinx", 0, @@ -36,13 +49,85 @@ namespace Ryujinx Location = new Point( (DisplayDevice.Default.Width / 2) - (Width / 2), (DisplayDevice.Default.Height / 2) - (Height / 2)); + + ResizeEvent = false; + + TitleEvent = false; } - protected override void OnLoad(EventArgs e) + private void RenderLoop() { - VSync = VSyncMode.On; + MakeCurrent(); + + Stopwatch Chrono = new Stopwatch(); + + Chrono.Start(); + + long TicksPerFrame = Stopwatch.Frequency / TargetFPS; + + long Ticks = 0; + + while (Exists && !IsExiting) + { + if (Ns.WaitFifo()) + { + Ns.ProcessFrame(); + } + + Renderer.RunActions(); + + if (ResizeEvent) + { + ResizeEvent = false; + + Renderer.FrameBuffer.SetWindowSize(Width, Height); + } + + Ticks += Chrono.ElapsedTicks; + + Chrono.Restart(); + + if (Ticks >= TicksPerFrame) + { + RenderFrame(); + + //Queue max. 1 vsync + Ticks = Math.Min(Ticks - TicksPerFrame, TicksPerFrame); + } + } + } + + public void MainLoop() + { + VSync = VSyncMode.Off; + + Visible = true; Renderer.FrameBuffer.SetWindowSize(Width, Height); + + Context.MakeCurrent(null); + + //OpenTK doesn't like sleeps in its thread, to avoid this a renderer thread is created + RenderThread = new Thread(RenderLoop); + + RenderThread.Start(); + + while (Exists && !IsExiting) + { + ProcessEvents(); + + if (!IsExiting) + { + UpdateFrame(); + + if (TitleEvent) + { + TitleEvent = false; + + Title = NewTitle; + } + } + } } private bool IsGamePadButtonPressedFromString(GamePadState GamePad, string Button) @@ -99,7 +184,7 @@ namespace Ryujinx } } - protected override void OnUpdateFrame(FrameEventArgs e) + private new void UpdateFrame() { HidControllerButtons CurrentButton = 0; HidJoystickPosition LeftJoystick; @@ -278,13 +363,9 @@ namespace Ryujinx CurrentButton, LeftJoystick, RightJoystick); - - Ns.ProcessFrame(); - - Renderer.RunActions(); } - protected override void OnRenderFrame(FrameEventArgs e) + private new void RenderFrame() { Renderer.FrameBuffer.Render(); @@ -293,16 +374,25 @@ namespace Ryujinx double HostFps = Ns.Statistics.GetSystemFrameRate(); double GameFps = Ns.Statistics.GetGameFrameRate(); - Title = $"Ryujinx | Host FPS: {HostFps:0.0} | Game FPS: {GameFps:0.0}"; + NewTitle = $"Ryujinx | Host FPS: {HostFps:0.0} | Game FPS: {GameFps:0.0}"; + + TitleEvent = true; SwapBuffers(); Ns.Os.SignalVsync(); } + protected override void OnUnload(EventArgs e) + { + RenderThread.Join(); + + base.OnUnload(e); + } + protected override void OnResize(EventArgs e) { - Renderer.FrameBuffer.SetWindowSize(Width, Height); + ResizeEvent = true; } protected override void OnKeyDown(KeyboardKeyEventArgs e) diff --git a/Ryujinx/Ui/Program.cs b/Ryujinx/Ui/Program.cs index b14897695..5cacc6228 100644 --- a/Ryujinx/Ui/Program.cs +++ b/Ryujinx/Ui/Program.cs @@ -67,7 +67,7 @@ namespace Ryujinx Screen.Exit(); }; - Screen.Run(0.0, 60.0); + Screen.MainLoop(); } Environment.Exit(0); From b233ae964fcaae900cdefa6ce51b0edb2892dfaf Mon Sep 17 00:00:00 2001 From: Merry Date: Thu, 12 Jul 2018 19:51:02 +0100 Subject: [PATCH 09/23] AInstEmitSimdCvt: Half-precision to single-precision conversion (#235) --- ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 8 ++--- ChocolArm64/Instruction/ASoftFloat.cs | 36 +++++++++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs | 40 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index 98bb972a2..da584743c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -45,10 +45,10 @@ namespace ChocolArm64.Instruction { if (SizeF == 0) { - //TODO: This need the half precision floating point type, - //that is not yet supported on .NET. We should probably - //do our own implementation on the meantime. - throw new NotImplementedException(); + EmitVectorExtractZx(Context, Op.Rn, Part + Index, 1); + Context.Emit(OpCodes.Conv_U2); + + Context.EmitCall(typeof(ASoftFloat), nameof(ASoftFloat.ConvertHalfToSingle)); } else /* if (SizeF == 1) */ { diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index e63c82bee..27f4f7fb4 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -225,5 +225,41 @@ namespace ChocolArm64.Instruction return 2.0 + op1 * op2; } + + public static float ConvertHalfToSingle(ushort x) + { + uint x_sign = (uint)(x >> 15) & 0x0001; + uint x_exp = (uint)(x >> 10) & 0x001F; + uint x_mantissa = (uint)x & 0x03FF; + + if (x_exp == 0 && x_mantissa == 0) + { + // Zero + return BitConverter.Int32BitsToSingle((int)(x_sign << 31)); + } + + if (x_exp == 0x1F) + { + // NaN or Infinity + return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | 0x7F800000 | (x_mantissa << 13))); + } + + int exponent = (int)x_exp - 15; + + if (x_exp == 0) + { + // Denormal + x_mantissa <<= 1; + while ((x_mantissa & 0x0400) == 0) + { + x_mantissa <<= 1; + exponent--; + } + x_mantissa &= 0x03FF; + } + + uint new_exp = (uint)((exponent + 127) & 0xFF) << 23; + return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13))); + } } } \ No newline at end of file diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs new file mode 100644 index 000000000..2d021616c --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs @@ -0,0 +1,40 @@ +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCvt : CpuTest + { + [TestCase((ushort)0x0000, 0x00000000u)] // Positive Zero + [TestCase((ushort)0x8000, 0x80000000u)] // Negative Zero + [TestCase((ushort)0x3E00, 0x3FC00000u)] // +1.5 + [TestCase((ushort)0xBE00, 0xBFC00000u)] // -1.5 + [TestCase((ushort)0xFFFF, 0xFFFFE000u)] // -QNaN + [TestCase((ushort)0x7C00, 0x7F800000u)] // +Inf + [TestCase((ushort)0x3C00, 0x3F800000u)] // 1.0 + [TestCase((ushort)0x3C01, 0x3F802000u)] // 1.0009765625 + [TestCase((ushort)0xC000, 0xC0000000u)] // -2.0 + [TestCase((ushort)0x7BFF, 0x477FE000u)] // 65504.0 (Largest Normal) + [TestCase((ushort)0x03FF, 0x387FC000u)] // 0.00006097555 (Largest Subnormal) + [TestCase((ushort)0x0001, 0x33800000u)] // 5.96046448e-8 (Smallest Subnormal) + public void Fcvtl_V_f16(ushort Value, uint Result) + { + uint Opcode = 0x0E217801; + Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(Value)); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0); + + Assert.Multiple(() => + { + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)0), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)1), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)2), Is.EqualTo(Result)); + Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V1), (byte)3), Is.EqualTo(Result)); + }); + } + } +} From 3b00333b0ce21538e5ff361e3e41d89bee586d36 Mon Sep 17 00:00:00 2001 From: greggameplayer <33609333+greggameplayer@users.noreply.github.com> Date: Fri, 13 Jul 2018 02:27:59 +0200 Subject: [PATCH 10/23] Add return of Texture Size and Bytes Per Pixel of ASTC2D 5x5, 6x6, 8x8, 10x10 and 12x12 (#249) * return correct size of ASTC 5x5, 6x6, 8x8, 10x10 and 12x12 * return correct Bytes Per Pixel * Use method in order to get CompressedTextureSize * Add Read16BptCompressedTexture method * add Bpb integer argument --- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 83 +++++++++++++++++++++--- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 53 +++++++++------ 2 files changed, 108 insertions(+), 28 deletions(-) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 3c633b692..6b9a30635 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -51,10 +51,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC1: case GalTextureFormat.BC4: { - int W = (Texture.Width + 3) / 4; - int H = (Texture.Height + 3) / 4; - - return W * H * 8; + return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 8); } case GalTextureFormat.BC7U: @@ -63,16 +60,86 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.BC5: case GalTextureFormat.Astc2D4x4: { - int W = (Texture.Width + 3) / 4; - int H = (Texture.Height + 3) / 4; - - return W * H * 16; + return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 16); + } + + case GalTextureFormat.Astc2D5x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 5, 5, 16); + } + + case GalTextureFormat.Astc2D6x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 6, 6, 16); + } + + case GalTextureFormat.Astc2D8x8: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 8, 16); + } + + case GalTextureFormat.Astc2D10x10: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 10, 16); + } + + case GalTextureFormat.Astc2D12x12: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 12, 12, 16); + } + + case GalTextureFormat.Astc2D5x4: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 5, 4, 16); + } + + case GalTextureFormat.Astc2D6x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 6, 5, 16); + } + + case GalTextureFormat.Astc2D8x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 6, 16); + } + + case GalTextureFormat.Astc2D10x8: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 8, 16); + } + + case GalTextureFormat.Astc2D12x10: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 12, 10, 16); + } + + case GalTextureFormat.Astc2D8x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 8, 5, 16); + } + + case GalTextureFormat.Astc2D10x5: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 5, 16); + } + + case GalTextureFormat.Astc2D10x6: + { + return CompressedTextureSize(Texture.Width, Texture.Height, 10, 6, 16); } } throw new NotImplementedException(Texture.Format.ToString()); } + public static int CompressedTextureSize(int TextureWidth, int TextureHeight, int BlockWidth, int BlockHeight, int Bpb) + { + int W = (TextureWidth + (BlockWidth - 1)) / BlockWidth; + int H = (TextureHeight + (BlockHeight - 1)) / BlockHeight; + + return W * H * Bpb; + } + public static (AMemory Memory, long Position) GetMemoryAndPosition( IAMemory Memory, long Position) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 24bceffb1..8bd4dbcba 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -10,23 +10,36 @@ namespace Ryujinx.HLE.Gpu.Texture { switch (Texture.Format) { - case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture); - case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); - case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); - case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); - case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); - case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); - case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture); - case GalTextureFormat.R16: return Read2Bpp (Memory, Texture); - case GalTextureFormat.R8: return Read1Bpp (Memory, Texture); - case GalTextureFormat.BC7U: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture); - case GalTextureFormat.BC2: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC3: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); - case GalTextureFormat.BC5: return Read16Bpt4x4(Memory, Texture); - case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); - case GalTextureFormat.Astc2D4x4: return Read16Bpt4x4(Memory, Texture); + case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture); + case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); + case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); + case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); + case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); + case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture); + case GalTextureFormat.R16: return Read2Bpp (Memory, Texture); + case GalTextureFormat.R8: return Read1Bpp (Memory, Texture); + case GalTextureFormat.BC7U: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC2: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC3: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.BC4: return Read8Bpt4x4 (Memory, Texture); + case GalTextureFormat.BC5: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.ZF32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.Astc2D4x4: return Read16BptCompressedTexture(Memory, Texture, 4, 4); + case GalTextureFormat.Astc2D5x5: return Read16BptCompressedTexture(Memory, Texture, 5, 5); + case GalTextureFormat.Astc2D6x6: return Read16BptCompressedTexture(Memory, Texture, 6, 6); + case GalTextureFormat.Astc2D8x8: return Read16BptCompressedTexture(Memory, Texture, 8, 8); + case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); + case GalTextureFormat.Astc2D12x12: return Read16BptCompressedTexture(Memory, Texture, 12, 12); + case GalTextureFormat.Astc2D5x4: return Read16BptCompressedTexture(Memory, Texture, 5, 4); + case GalTextureFormat.Astc2D6x5: return Read16BptCompressedTexture(Memory, Texture, 6, 5); + case GalTextureFormat.Astc2D8x6: return Read16BptCompressedTexture(Memory, Texture, 8, 6); + case GalTextureFormat.Astc2D10x8: return Read16BptCompressedTexture(Memory, Texture, 10, 8); + case GalTextureFormat.Astc2D12x10: return Read16BptCompressedTexture(Memory, Texture, 12, 10); + case GalTextureFormat.Astc2D8x5: return Read16BptCompressedTexture(Memory, Texture, 8, 5); + case GalTextureFormat.Astc2D10x5: return Read16BptCompressedTexture(Memory, Texture, 10, 5); + case GalTextureFormat.Astc2D10x6: return Read16BptCompressedTexture(Memory, Texture, 10, 6); } throw new NotImplementedException(Texture.Format.ToString()); @@ -307,10 +320,10 @@ namespace Ryujinx.HLE.Gpu.Texture return Output; } - private unsafe static byte[] Read16Bpt4x4(IAMemory Memory, TextureInfo Texture) + private unsafe static byte[] Read16BptCompressedTexture(IAMemory Memory, TextureInfo Texture, int BlockWidth, int BlockHeight) { - int Width = (Texture.Width + 3) / 4; - int Height = (Texture.Height + 3) / 4; + int Width = (Texture.Width + (BlockWidth - 1)) / BlockWidth; + int Height = (Texture.Height + (BlockHeight - 1)) / BlockHeight; byte[] Output = new byte[Width * Height * 16]; From 37bf02f0572ff5535695ffa9527b1e651d0a1b7d Mon Sep 17 00:00:00 2001 From: Thomas Guillemard Date: Fri, 13 Jul 2018 23:35:19 +0200 Subject: [PATCH 11/23] TimeZone implements cmd 0, 1, 2, 3, 4 and 100 (#250) The implementation of the TimezoneRule isn't matching hardware but doesn't need to be accurate (games are only passing the value) --- .../OsHle/Services/Time/ITimeZoneService.cs | 174 +++++++++++++++--- 1 file changed, 146 insertions(+), 28 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs b/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs index 39454d433..a2206a126 100644 --- a/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs +++ b/Ryujinx.HLE/OsHle/Services/Time/ITimeZoneService.cs @@ -2,6 +2,7 @@ using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Ipc; using System; using System.Collections.Generic; +using System.Text; namespace Ryujinx.HLE.OsHle.Services.Time { @@ -13,20 +14,31 @@ namespace Ryujinx.HLE.OsHle.Services.Time private static readonly DateTime Epoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); + private TimeZoneInfo TimeZone = TimeZoneInfo.Local; + public ITimeZoneService() { m_Commands = new Dictionary() { - { 0, GetDeviceLocationName }, - { 101, ToCalendarTimeWithMyRule } + { 0, GetDeviceLocationName }, + { 1, SetDeviceLocationName }, + { 2, GetTotalLocationNameCount }, + { 3, LoadLocationNameList }, + { 4, LoadTimeZoneRule }, + { 100, ToCalendarTime }, + { 101, ToCalendarTimeWithMyRule } }; } public long GetDeviceLocationName(ServiceCtx Context) { - Context.Ns.Log.PrintStub(LogClass.ServiceTime, "Stubbed."); + char[] TzName = TimeZone.Id.ToCharArray(); - for (int Index = 0; Index < 0x24; Index++) + Context.ResponseData.Write(TzName); + + int Padding = 0x24 - TzName.Length; + + for (int Index = 0; Index < Padding; Index++) { Context.ResponseData.Write((byte)0); } @@ -34,11 +46,94 @@ namespace Ryujinx.HLE.OsHle.Services.Time return 0; } - public long ToCalendarTimeWithMyRule(ServiceCtx Context) + public long SetDeviceLocationName(ServiceCtx Context) { - long PosixTime = Context.RequestData.ReadInt64(); + byte[] LocationName = Context.RequestData.ReadBytes(0x24); + string TzID = Encoding.ASCII.GetString(LocationName).TrimEnd('\0'); - DateTime CurrentTime = Epoch.AddSeconds(PosixTime).ToLocalTime(); + long ResultCode = 0; + + try + { + TimeZone = TimeZoneInfo.FindSystemTimeZoneById(TzID); + } + catch (TimeZoneNotFoundException e) + { + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + public long GetTotalLocationNameCount(ServiceCtx Context) + { + Context.ResponseData.Write(TimeZoneInfo.GetSystemTimeZones().Count); + + return 0; + } + + public long LoadLocationNameList(ServiceCtx Context) + { + long BufferPosition = Context.Response.SendBuff[0].Position; + long BufferSize = Context.Response.SendBuff[0].Size; + + int i = 0; + foreach (TimeZoneInfo info in TimeZoneInfo.GetSystemTimeZones()) + { + byte[] TzData = Encoding.ASCII.GetBytes(info.Id); + + Context.Memory.WriteBytes(BufferPosition + i, TzData); + + int Padding = 0x24 - TzData.Length; + + for (int Index = 0; Index < Padding; Index++) + { + Context.ResponseData.Write((byte)0); + } + + i += 0x24; + } + return 0; + } + + public long LoadTimeZoneRule(ServiceCtx Context) + { + long BufferPosition = Context.Request.ReceiveBuff[0].Position; + long BufferSize = Context.Request.ReceiveBuff[0].Size; + + if (BufferSize != 0x4000) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"TimeZoneRule buffer size is 0x{BufferSize:x} (expected 0x4000)"); + } + + long ResultCode = 0; + + byte[] LocationName = Context.RequestData.ReadBytes(0x24); + string TzID = Encoding.ASCII.GetString(LocationName).TrimEnd('\0'); + + // Check if the Time Zone exists, otherwise error out. + try + { + TimeZoneInfo Info = TimeZoneInfo.FindSystemTimeZoneById(TzID); + byte[] TzData = Encoding.ASCII.GetBytes(Info.Id); + + // FIXME: This is not in ANY cases accurate, but the games don't about the content of the buffer, they only pass it. + // TODO: Reverse the TZif2 conversion in PCV to make this match with real hardware. + Context.Memory.WriteBytes(BufferPosition, TzData); + } + catch (TimeZoneNotFoundException e) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"Timezone not found for string: {TzID} (len: {TzID.Length})"); + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + private long ToCalendarTimeWithTz(ServiceCtx Context, long PosixTime, TimeZoneInfo Info) + { + DateTime CurrentTime = Epoch.AddSeconds(PosixTime); + CurrentTime = TimeZoneInfo.ConvertTimeFromUtc(CurrentTime, Info); Context.ResponseData.Write((ushort)CurrentTime.Year); Context.ResponseData.Write((byte)CurrentTime.Month); @@ -46,31 +141,54 @@ namespace Ryujinx.HLE.OsHle.Services.Time Context.ResponseData.Write((byte)CurrentTime.Hour); Context.ResponseData.Write((byte)CurrentTime.Minute); Context.ResponseData.Write((byte)CurrentTime.Second); - Context.ResponseData.Write((byte)0); - - /* Thanks to TuxSH - struct CalendarAdditionalInfo { - u32 tm_wday; //day of week [0,6] (Sunday = 0) - s32 tm_yday; //day of year [0,365] - struct timezone { - char[8] tz_name; - bool isDaylightSavingTime; - s32 utcOffsetSeconds; - }; - }; - */ + Context.ResponseData.Write((byte)0); //MilliSecond ? Context.ResponseData.Write((int)CurrentTime.DayOfWeek); - Context.ResponseData.Write(CurrentTime.DayOfYear - 1); - - //TODO: Find out the names used. - Context.ResponseData.Write(new byte[8]); - + Context.ResponseData.Write(new byte[8]); //TODO: Find out the names used. Context.ResponseData.Write((byte)(CurrentTime.IsDaylightSavingTime() ? 1 : 0)); - - Context.ResponseData.Write((int)TimeZoneInfo.Local.GetUtcOffset(CurrentTime).TotalSeconds); + Context.ResponseData.Write((int)Info.GetUtcOffset(CurrentTime).TotalSeconds); return 0; } + + public long ToCalendarTime(ServiceCtx Context) + { + long PosixTime = Context.RequestData.ReadInt64(); + long BufferPosition = Context.Request.SendBuff[0].Position; + long BufferSize = Context.Request.SendBuff[0].Size; + + if (BufferSize != 0x4000) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"TimeZoneRule buffer size is 0x{BufferSize:x} (expected 0x4000)"); + } + + // TODO: Reverse the TZif2 conversion in PCV to make this match with real hardware. + byte[] TzData = Context.Memory.ReadBytes(BufferPosition, 0x24); + string TzID = Encoding.ASCII.GetString(TzData).TrimEnd('\0'); + + long ResultCode = 0; + + // Check if the Time Zone exists, otherwise error out. + try + { + TimeZoneInfo Info = TimeZoneInfo.FindSystemTimeZoneById(TzID); + + ResultCode = ToCalendarTimeWithTz(Context, PosixTime, Info); + } + catch (TimeZoneNotFoundException e) + { + Context.Ns.Log.PrintWarning(LogClass.ServiceTime, $"Timezone not found for string: {TzID} (len: {TzID.Length})"); + ResultCode = 0x7BA74; + } + + return ResultCode; + } + + public long ToCalendarTimeWithMyRule(ServiceCtx Context) + { + long PosixTime = Context.RequestData.ReadInt64(); + + return ToCalendarTimeWithTz(Context, PosixTime, TimeZone); + } } -} \ No newline at end of file +} From 494f8f0248e7daf3fdfb89a6d90f1598232b6a87 Mon Sep 17 00:00:00 2001 From: Starlet Date: Fri, 13 Jul 2018 16:36:57 -0500 Subject: [PATCH 12/23] Implement CSRNG (Cryptographically Secure Random Bytes) (#216) * Implement CSRNG (Cryptographically Secure Random Bytes) * Compliant with review. * Dispose Rng --- Ryujinx.HLE/OsHle/Services/ServiceFactory.cs | 4 ++ .../OsHle/Services/Spl/IRandomInterface.cs | 50 +++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs diff --git a/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs b/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs index 914c84490..b69fc9f88 100644 --- a/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs +++ b/Ryujinx.HLE/OsHle/Services/ServiceFactory.cs @@ -18,6 +18,7 @@ using Ryujinx.HLE.OsHle.Services.Prepo; using Ryujinx.HLE.OsHle.Services.Set; using Ryujinx.HLE.OsHle.Services.Sfdnsres; using Ryujinx.HLE.OsHle.Services.Sm; +using Ryujinx.HLE.OsHle.Services.Spl; using Ryujinx.HLE.OsHle.Services.Ssl; using Ryujinx.HLE.OsHle.Services.Vi; using System; @@ -66,6 +67,9 @@ namespace Ryujinx.HLE.OsHle.Services case "caps:ss": return new IScreenshotService(); + case "csrng": + return new IRandomInterface(); + case "friend:a": return new IServiceCreator(); diff --git a/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs b/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs new file mode 100644 index 000000000..489ca52ce --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Spl/IRandomInterface.cs @@ -0,0 +1,50 @@ +using Ryujinx.HLE.OsHle.Ipc; +using System; +using System.Collections.Generic; +using System.Security.Cryptography; + +namespace Ryujinx.HLE.OsHle.Services.Spl +{ + class IRandomInterface : IpcService, IDisposable + { + private Dictionary m_Commands; + + public override IReadOnlyDictionary Commands => m_Commands; + + private RNGCryptoServiceProvider Rng; + + public IRandomInterface() + { + m_Commands = new Dictionary() + { + { 0, GetRandomBytes } + }; + + Rng = new RNGCryptoServiceProvider(); + } + + public long GetRandomBytes(ServiceCtx Context) + { + byte[] RandomBytes = new byte[Context.Request.ReceiveBuff[0].Size]; + + Rng.GetBytes(RandomBytes); + + Context.Memory.WriteBytes(Context.Request.ReceiveBuff[0].Position, RandomBytes); + + return 0; + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool Disposing) + { + if (Disposing) + { + Rng.Dispose(); + } + } + } +} \ No newline at end of file From 2f37583ab3b49aa5064a72c8d3b4e8245ebb6b5b Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:08:39 -0300 Subject: [PATCH 13/23] Some small shader related fixes (#258) * Some small shader related fixes * Address PR feedback --- Ryujinx.Graphics/Gal/IGalShader.cs | 2 ++ Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 12 +++++++ Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 31 +++++++++++++------ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 10 +++--- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 28 ++++++++--------- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 2 +- 6 files changed, 57 insertions(+), 28 deletions(-) diff --git a/Ryujinx.Graphics/Gal/IGalShader.cs b/Ryujinx.Graphics/Gal/IGalShader.cs index 06f3fac97..9adaceaf5 100644 --- a/Ryujinx.Graphics/Gal/IGalShader.cs +++ b/Ryujinx.Graphics/Gal/IGalShader.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Gal void Bind(long Key); + void Unbind(GalShaderType Type); + void BindProgram(); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index 3c5c874ea..c55a758b4 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -203,6 +203,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } + public void Unbind(GalShaderType Type) + { + switch (Type) + { + case GalShaderType.Vertex: Current.Vertex = null; break; + case GalShaderType.TessControl: Current.TessControl = null; break; + case GalShaderType.TessEvaluation: Current.TessEvaluation = null; break; + case GalShaderType.Geometry: Current.Geometry = null; break; + case GalShaderType.Fragment: Current.Fragment = null; break; + } + } + public void BindProgram() { if (Current.Vertex == null || diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index f3075a504..575fb72f9 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -216,7 +216,7 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclOutAttributes() { - if (Decl.ShaderType == GalShaderType.Vertex) + if (Decl.ShaderType != GalShaderType.Fragment) { SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") out vec4 " + GlslDecl.PositionOutAttrName + ";"); } @@ -337,7 +337,10 @@ namespace Ryujinx.Graphics.Gal.Shader if (Decl.ShaderType == GalShaderType.Vertex) { SB.AppendLine(IdentationStr + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";"); + } + if (Decl.ShaderType != GalShaderType.Fragment) + { SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + " = gl_Position;"); SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + ".w = 1;"); } @@ -598,9 +601,6 @@ namespace Ryujinx.Graphics.Gal.Shader { switch (Op.Inst) { - case ShaderIrInst.Frcp: - return true; - case ShaderIrInst.Ipa: case ShaderIrInst.Texq: case ShaderIrInst.Texs: @@ -608,8 +608,7 @@ namespace Ryujinx.Graphics.Gal.Shader return false; } - return Op.OperandB != null || - Op.OperandC != null; + return true; } private string GetName(ShaderIrOperCbuf Cbuf) @@ -711,13 +710,13 @@ namespace Ryujinx.Graphics.Gal.Shader } else { - return Imm.Value.ToString(CultureInfo.InvariantCulture); + return GetIntConst(Imm.Value); } } private string GetValue(ShaderIrOperImmf Immf) { - return Immf.Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Immf.Value); } private string GetName(ShaderIrOperPred Pred) @@ -1047,7 +1046,7 @@ namespace Ryujinx.Graphics.Gal.Shader if (!float.IsNaN(Value) && !float.IsInfinity(Value)) { - return Value.ToString(CultureInfo.InvariantCulture); + return GetFloatConst(Value); } } break; @@ -1064,6 +1063,20 @@ namespace Ryujinx.Graphics.Gal.Shader return Expr; } + private static string GetIntConst(int Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + + private static string GetFloatConst(float Value) + { + string Expr = Value.ToString(CultureInfo.InvariantCulture); + + return Value < 0 ? "(" + Expr + ")" : Expr; + } + private static OperType GetDstNodeType(ShaderIrNode Node) { //Special case instructions with the result type different diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 2bacd71b3..5c474ab0b 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -172,6 +172,8 @@ namespace Ryujinx.HLE.Gpu.Engines for (; Index < 6; Index++) { + GalShaderType Type = GetTypeFromProgram(Index); + int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10); int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10); @@ -180,16 +182,16 @@ namespace Ryujinx.HLE.Gpu.Engines if (!Enable) { + Gpu.Renderer.Shader.Unbind(Type); + continue; } long Key = BasePosition + (uint)Offset; - GalShaderType ShaderType = GetTypeFromProgram(Index); + Keys[(int)Type] = Key; - Keys[(int)ShaderType] = Key; - - Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType); + Gpu.Renderer.Shader.Create(Vmm, Key, Type); Gpu.Renderer.Shader.Bind(Key); } diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index 6b9a30635..c0749d6a2 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -62,67 +62,67 @@ namespace Ryujinx.HLE.Gpu.Texture { return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 16); } - + case GalTextureFormat.Astc2D5x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 5, 16); } - + case GalTextureFormat.Astc2D6x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 6, 16); } - + case GalTextureFormat.Astc2D8x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 8, 16); } - + case GalTextureFormat.Astc2D10x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 10, 16); } - + case GalTextureFormat.Astc2D12x12: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 12, 16); } - + case GalTextureFormat.Astc2D5x4: { return CompressedTextureSize(Texture.Width, Texture.Height, 5, 4, 16); } - + case GalTextureFormat.Astc2D6x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 6, 5, 16); } - + case GalTextureFormat.Astc2D8x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 6, 16); } - + case GalTextureFormat.Astc2D10x8: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 8, 16); } - + case GalTextureFormat.Astc2D12x10: { return CompressedTextureSize(Texture.Width, Texture.Height, 12, 10, 16); } - + case GalTextureFormat.Astc2D8x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 8, 5, 16); } - + case GalTextureFormat.Astc2D10x5: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 5, 16); } - + case GalTextureFormat.Astc2D10x6: { return CompressedTextureSize(Texture.Width, Texture.Height, 10, 6, 16); @@ -139,7 +139,7 @@ namespace Ryujinx.HLE.Gpu.Texture return W * H * Bpb; } - + public static (AMemory Memory, long Position) GetMemoryAndPosition( IAMemory Memory, long Position) diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 8bd4dbcba..6c08cd6c4 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -30,7 +30,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.Astc2D5x5: return Read16BptCompressedTexture(Memory, Texture, 5, 5); case GalTextureFormat.Astc2D6x6: return Read16BptCompressedTexture(Memory, Texture, 6, 6); case GalTextureFormat.Astc2D8x8: return Read16BptCompressedTexture(Memory, Texture, 8, 8); - case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); + case GalTextureFormat.Astc2D10x10: return Read16BptCompressedTexture(Memory, Texture, 10, 10); case GalTextureFormat.Astc2D12x12: return Read16BptCompressedTexture(Memory, Texture, 12, 12); case GalTextureFormat.Astc2D5x4: return Read16BptCompressedTexture(Memory, Texture, 5, 4); case GalTextureFormat.Astc2D6x5: return Read16BptCompressedTexture(Memory, Texture, 6, 5); From 514218ab98acc1f0ace2e2cc0b8c1091ffccc6ce Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:13:02 -0300 Subject: [PATCH 14/23] Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions, nits (#225) * Add SMLSL, SQRSHRN and SRSHR (Vector) cpu instructions * Address PR feedback * Address PR feedback * Remove another useless temp var * nit: Alignment * Replace Context.CurrOp.GetBitsCount() with Op.GetBitsCount() * Fix encodings and move flag bit test out of the loop --- ChocolArm64/AOpCodeTable.cs | 20 ++- .../Instruction/AInstEmitSimdArithmetic.cs | 118 ++++----------- ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 6 +- ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 4 +- .../Instruction/AInstEmitSimdHelper.cs | 135 ++++++++++++++++-- .../Instruction/AInstEmitSimdLogical.cs | 4 +- .../Instruction/AInstEmitSimdMemory.cs | 5 +- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 18 +-- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 108 ++++++++++---- 9 files changed, 265 insertions(+), 153 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index fb4763ef8..0e979aa44 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -371,16 +371,22 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Smin_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V, typeof(AOpCodeSimdShImm)); SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S, typeof(AOpCodeSimd)); SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V, typeof(AOpCodeSimd)); SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd)); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V, typeof(AOpCodeSimdShImm)); - SetA64("010111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs)); @@ -419,9 +425,11 @@ namespace ChocolArm64 SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm)); - SetA64("011111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); - SetA64("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 39331f965..2fc8f178d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -65,11 +65,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -97,9 +98,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -190,84 +192,6 @@ namespace ChocolArm64.Instruction } } - private static void EmitSaturatingExtNarrow(AILEmitterCtx Context, bool SignedSrc, bool SignedDst, bool Scalar) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = (!Scalar ? 8 >> Op.Size : 1); - int ESize = 8 << Op.Size; - - int Part = (!Scalar & (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0); - - int TMaxValue = (SignedDst ? (1 << (ESize - 1)) - 1 : (int)((1L << ESize) - 1L)); - int TMinValue = (SignedDst ? -((1 << (ESize - 1))) : 0); - - Context.EmitLdc_I8(0L); - Context.EmitSttmp(); - - for (int Index = 0; Index < Elems; Index++) - { - AILLabel LblLe = new AILLabel(); - AILLabel LblGeEnd = new AILLabel(); - - EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMaxValue); - Context.Emit(OpCodes.Conv_U8); - - Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMaxValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.Emit(OpCodes.Br_S, LblGeEnd); - - Context.MarkLabel(LblLe); - - Context.Emit(OpCodes.Dup); - - Context.EmitLdc_I4(TMinValue); - Context.Emit(OpCodes.Conv_I8); - - Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); - - Context.Emit(OpCodes.Pop); - - Context.EmitLdc_I4(TMinValue); - - Context.EmitLdc_I8(0x8000000L); - Context.EmitSttmp(); - - Context.MarkLabel(LblGeEnd); - - if (Scalar) - { - EmitVectorZeroLower(Context, Op.Rd); - } - - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); - } - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitLdarg(ATranslatedSub.StateArgIdx); - Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - Context.EmitLdtmp(); - Context.Emit(OpCodes.Conv_I4); - Context.Emit(OpCodes.Or); - Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); - } - public static void Fabd_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => @@ -338,7 +262,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> SizeF + 2; int Half = Elems >> 1; @@ -870,7 +794,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { @@ -1102,6 +1026,15 @@ namespace ChocolArm64.Instruction }); } + public static void Smlsl_V(AILEmitterCtx Context) + { + EmitVectorWidenRnRmTernaryOpSx(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Smull_V(AILEmitterCtx Context) { EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul)); @@ -1109,22 +1042,22 @@ namespace ChocolArm64.Instruction public static void Sqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: true); + EmitScalarSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: false); + EmitVectorSaturatingNarrowOpSxSx(Context, () => { }); } public static void Sqxtun_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sqxtun_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } public static void Sub_S(AILEmitterCtx Context) @@ -1198,11 +1131,12 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); - for (int Index = 1; Index < (Bytes >> Op.Size); Index++) + for (int Index = 1; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -1272,12 +1206,12 @@ namespace ChocolArm64.Instruction public static void Uqxtn_S(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: true); + EmitScalarSaturatingNarrowOpZxZx(Context, () => { }); } public static void Uqxtn_V(AILEmitterCtx Context) { - EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: false); + EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index 68a7ab880..773d98944 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -363,7 +363,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -407,7 +407,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = (!Scalar ? Bytes >> Op.Size : 1); ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -454,7 +454,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index da584743c..7b355494d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -337,7 +337,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { @@ -426,7 +426,7 @@ namespace ChocolArm64.Instruction int FBits = GetFBits(Context); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeI); Index++) { diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index d895ec9c7..1f7a2dad1 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Reflection.Emit; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -417,7 +418,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -467,7 +468,7 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) { @@ -527,9 +528,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Opers.HasFlag(OperFlags.Rd)) { @@ -582,9 +584,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -622,9 +625,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Binary) { @@ -739,11 +743,11 @@ namespace ChocolArm64.Instruction EmitVectorPairwiseOp(Context, Emit, false); } - private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) + public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -769,6 +773,117 @@ namespace ChocolArm64.Instruction } } + public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, true); + } + + public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, true); + } + + public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, true); + } + + public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, true, false); + } + + public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, true, false, false); + } + + public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) + { + EmitSaturatingNarrowOp(Context, Emit, false, false, false); + } + + public static void EmitSaturatingNarrowOp( + AILEmitterCtx Context, + Action Emit, + bool SignedSrc, + bool SignedDst, + bool Scalar) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Elems = !Scalar ? 8 >> Op.Size : 1; + int ESize = 8 << Op.Size; + + int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; + + long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; + long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; + + Context.EmitLdc_I8(0L); + Context.EmitSttmp(); + + for (int Index = 0; Index < Elems; Index++) + { + AILLabel LblLe = new AILLabel(); + AILLabel LblGeEnd = new AILLabel(); + + EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); + + Emit(); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMaxValue); + + Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMaxValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.Emit(OpCodes.Br_S, LblGeEnd); + + Context.MarkLabel(LblLe); + + Context.Emit(OpCodes.Dup); + + Context.EmitLdc_I8(TMinValue); + + Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); + + Context.Emit(OpCodes.Pop); + + Context.EmitLdc_I8(TMinValue); + Context.EmitLdc_I8(0x8000000L); + Context.EmitSttmp(); + + Context.MarkLabel(LblGeEnd); + + if (Scalar) + { + EmitVectorZeroLower(Context, Op.Rd); + } + + EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + } + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitLdarg(ATranslatedSub.StateArgIdx); + Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + Context.EmitLdtmp(); + Context.Emit(OpCodes.Conv_I4); + Context.Emit(OpCodes.Or); + Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); + } + public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size) { EmitVectorZeroAll(Context, Reg); diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs index 8475a8a47..9f5af96cb 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs @@ -55,7 +55,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; for (int Index = 0; Index < Elems; Index++) @@ -195,7 +195,7 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs index d98ec012e..368b014fb 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs @@ -105,13 +105,14 @@ namespace ChocolArm64.Instruction throw new InvalidOperationException(); } - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; for (int SElem = 0; SElem < Op.SElems; SElem++) { int Rt = (Op.Rt + SElem) & 0x1f; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitMemAddress(); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index d67946a97..739f01c62 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -14,9 +14,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { Context.EmitLdintzr(Op.Rn); @@ -42,9 +43,10 @@ namespace ChocolArm64.Instruction { AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size); @@ -64,7 +66,7 @@ namespace ChocolArm64.Instruction Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Position = Op.Imm4; @@ -329,7 +331,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; @@ -355,7 +357,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; @@ -382,7 +384,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 24d35abe4..6f6b56068 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -27,9 +27,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Shll_V(AILEmitterCtx Context) @@ -45,22 +43,21 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift); + EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op)); } public static void Sli_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - int Shift = Op.Imm - (8 << Op.Size); + int Shift = GetImmShl(Op); - ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; + ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); @@ -84,6 +81,39 @@ namespace ChocolArm64.Instruction } } + public static void Sqrshrn_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + Action Emit = () => + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + + Context.EmitLdc_I4(Shift); + + Context.Emit(OpCodes.Shr); + }; + + EmitVectorSaturatingNarrowOpSxSx(Context, Emit); + } + + public static void Srshr_V(AILEmitterCtx Context) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst); + } + public static void Sshl_V(AILEmitterCtx Context) { EmitVectorShl(Context, Signed: true); @@ -93,9 +123,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Sshr_S(AILEmitterCtx Context) @@ -115,24 +143,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - - EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift); + EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op)); } public static void Ssra_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = (8 << (Op.Size + 1)) - Op.Imm; - Action Emit = () => { Context.Emit(OpCodes.Shr); Context.Emit(OpCodes.Add); }; - EmitVectorShImmTernarySx(Context, Emit, Shift); + EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op)); } public static void Ushl_V(AILEmitterCtx Context) @@ -144,9 +168,7 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - int Shift = Op.Imm - (8 << Op.Size); - - EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift); + EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); } public static void Ushr_S(AILEmitterCtx Context) @@ -251,28 +273,51 @@ namespace ChocolArm64.Instruction } } + [Flags] + private enum ShImmFlags + { + None = 0, + + Signed = 1 << 0, + Ternary = 1 << 1, + Rounded = 1 << 2, + + SignedTernary = Signed | Ternary, + SignedRounded = Signed | Rounded + } + private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed); } private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, true, true); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary); } private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) { - EmitVectorShImmOp(Context, Emit, Imm, false, false); + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None); } - private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, bool Ternary, bool Signed) + private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc) + { + EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc); + } + + private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - int Bytes = Context.CurrOp.GetBitsCount() >> 3; + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - for (int Index = 0; Index < (Bytes >> Op.Size); Index++) + bool Signed = (Flags & ShImmFlags.Signed) != 0; + bool Ternary = (Flags & ShImmFlags.Ternary) != 0; + bool Rounded = (Flags & ShImmFlags.Rounded) != 0; + + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -281,6 +326,13 @@ namespace ChocolArm64.Instruction EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); + if (Rounded) + { + Context.EmitLdc_I8(Rc); + + Context.Emit(OpCodes.Add); + } + Context.EmitLdc_I4(Imm); Emit(); From fc12fca96237c9aadc78436dc7c77480fa83fa09 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 13:53:44 -0300 Subject: [PATCH 15/23] Allow using ulong max value as yield (#263) --- Ryujinx.HLE/OsHle/Kernel/SvcThread.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs index b0a7490a3..8702203e4 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcThread.cs @@ -87,7 +87,7 @@ namespace Ryujinx.HLE.OsHle.Kernel KThread CurrThread = Process.GetThread(ThreadState.Tpidr); - if (TimeoutNs == 0) + if (TimeoutNs == 0 || TimeoutNs == ulong.MaxValue) { Process.Scheduler.Yield(CurrThread); } From be31f5b46d16f0f8730d9a9ec71f938eee97524a Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sat, 14 Jul 2018 20:07:44 +0200 Subject: [PATCH 16/23] Improve CountLeadingZeros() algorithm, nits. (#219) * Update AInstEmitSimdArithmetic.cs * Update ASoftFallback.cs * Update ASoftFallback.cs * Update ASoftFallback.cs * Update AInstEmitSimdArithmetic.cs --- .../Instruction/AInstEmitSimdArithmetic.cs | 4 +- ChocolArm64/Instruction/ASoftFallback.cs | 46 ++++++++++++++----- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 2fc8f178d..a39ffc093 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -101,11 +101,13 @@ namespace ChocolArm64.Instruction int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; + int ESize = 8 << Op.Size; + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - Context.EmitLdc_I4(8 << Op.Size); + Context.EmitLdc_I4(ESize); Emit(); diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 5c0a9c8e3..ae3994b02 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -12,22 +12,42 @@ namespace ChocolArm64.Instruction public static ulong CountLeadingSigns(ulong Value, int Size) { - return CountLeadingZeros((Value >> 1) ^ Value, Size - 1); - } + Value ^= Value >> 1; - public static ulong CountLeadingZeros(ulong Value, int Size) - { - int HighBit = Size - 1; + int HighBit = Size - 2; for (int Bit = HighBit; Bit >= 0; Bit--) { - if (((Value >> Bit) & 1) != 0) + if (((Value >> Bit) & 0b1) != 0) { return (ulong)(HighBit - Bit); } } - return (ulong)Size; + return (ulong)(Size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong Value, int Size) + { + if (Value == 0) + { + return (ulong)Size; + } + + int NibbleIdx = Size; + int PreCount, Count = 0; + + do + { + NibbleIdx -= 4; + PreCount = ClzNibbleTbl[(Value >> NibbleIdx) & 0b1111]; + Count += PreCount; + } + while (PreCount == 4); + + return (ulong)Count; } public static uint CountSetBits8(uint Value) @@ -61,8 +81,8 @@ namespace ChocolArm64.Instruction private static uint Crc32w(uint Crc, uint Poly, uint Val) { - Crc = Crc32(Crc, Poly, (byte)(Val >> 0)); - Crc = Crc32(Crc, Poly, (byte)(Val >> 8)); + Crc = Crc32(Crc, Poly, (byte)(Val >> 0 )); + Crc = Crc32(Crc, Poly, (byte)(Val >> 8 )); Crc = Crc32(Crc, Poly, (byte)(Val >> 16)); Crc = Crc32(Crc, Poly, (byte)(Val >> 24)); @@ -71,8 +91,8 @@ namespace ChocolArm64.Instruction private static uint Crc32x(uint Crc, uint Poly, ulong Val) { - Crc = Crc32(Crc, Poly, (byte)(Val >> 0)); - Crc = Crc32(Crc, Poly, (byte)(Val >> 8)); + Crc = Crc32(Crc, Poly, (byte)(Val >> 0 )); + Crc = Crc32(Crc, Poly, (byte)(Val >> 8 )); Crc = Crc32(Crc, Poly, (byte)(Val >> 16)); Crc = Crc32(Crc, Poly, (byte)(Val >> 24)); Crc = Crc32(Crc, Poly, (byte)(Val >> 32)); @@ -168,9 +188,10 @@ namespace ChocolArm64.Instruction public static long SMulHi128(long LHS, long RHS) { - long Result = (long)UMulHi128((ulong)(LHS), (ulong)(RHS)); + long Result = (long)UMulHi128((ulong)LHS, (ulong)RHS); if (LHS < 0) Result -= RHS; if (RHS < 0) Result -= LHS; + return Result; } @@ -187,6 +208,7 @@ namespace ChocolArm64.Instruction ulong Z1 = T & 0xFFFFFFFF; ulong Z0 = T >> 32; Z1 += LLow * RHigh; + return LHigh * RHigh + Z0 + (Z1 >> 32); } } From 98c6ceede564eda4aed528e51219a9b0d6bea1c4 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 14 Jul 2018 23:57:41 -0300 Subject: [PATCH 17/23] Audio Renderer improvements (#210) * Partial voice implementation on audio renderer * Implemented audren resampler (based on original impl) * Fix BiquadFilter struct * Pause audio playback on last stream buffer * Split audren/audout files into separate folders, some minor cleanup * Use AudioRendererParameter on GetWorkBufferSize aswell * Bump audren version to REV4, name a few things, increase sample buffer size * Remove useless new lines --- Ryujinx.Audio/Adpcm/AdpcmDecoder.cs | 91 +++++ Ryujinx.Audio/Adpcm/AdpcmDecoderContext.cs | 10 + Ryujinx.Audio/DspUtils.cs | 16 + Ryujinx.Audio/IAalOutput.cs | 8 +- Ryujinx.Audio/OpenAL/OpenALAudioOut.cs | 53 +-- Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs | 1 + .../Aud/{ => AudioOut}/AudioOutData.cs | 2 +- .../Services/Aud/{ => AudioOut}/IAudioOut.cs | 3 +- .../Services/Aud/AudioRenderer/AudioConsts.cs | 8 + .../Services/Aud/AudioRenderer/BehaviorIn.cs | 11 + .../Aud/AudioRenderer/BiquadFilter.cs | 16 + .../Aud/AudioRenderer/IAudioRenderer.cs | 316 ++++++++++++++++++ .../Aud/AudioRenderer/MemoryPoolContext.cs | 12 + .../Aud/AudioRenderer/MemoryPoolIn.cs | 14 + .../Aud/AudioRenderer/MemoryPoolOut.cs | 12 + .../{ => AudioRenderer}/MemoryPoolState.cs | 2 +- .../Services/Aud/AudioRenderer/PlayState.cs | 9 + .../Services/Aud/AudioRenderer/Resampler.cs | 191 +++++++++++ .../{ => AudioRenderer}/UpdateDataHeader.cs | 12 +- .../AudioRenderer/VoiceChannelResourceIn.cs | 10 + .../Aud/AudioRenderer/VoiceContext.cs | 188 +++++++++++ .../Services/Aud/AudioRenderer/VoiceIn.cs | 49 +++ .../Services/Aud/AudioRenderer/VoiceOut.cs | 12 + .../Services/Aud/AudioRenderer/WaveBuffer.cs | 20 ++ .../Services/Aud/AudioRendererParameter.cs | 8 +- .../OsHle/Services/Aud/IAudioOutManager.cs | 5 +- .../OsHle/Services/Aud/IAudioRenderer.cs | 136 -------- .../Services/Aud/IAudioRendererManager.cs | 129 +++---- .../OsHle/Services/Aud/SampleFormat.cs | 8 +- Ryujinx.HLE/OsHle/Services/Aud/VoiceState.cs | 9 - .../OsHle/Services/Nv/NvMap/NvMapIoctl.cs | 4 +- Ryujinx.HLE/OsHle/Utilities/IntUtils.cs | 4 +- Ryujinx.HLE/OsHle/Utilities/StructReader.cs | 45 +++ Ryujinx.HLE/OsHle/Utilities/StructWriter.cs | 25 ++ 34 files changed, 1168 insertions(+), 271 deletions(-) create mode 100644 Ryujinx.Audio/Adpcm/AdpcmDecoder.cs create mode 100644 Ryujinx.Audio/Adpcm/AdpcmDecoderContext.cs create mode 100644 Ryujinx.Audio/DspUtils.cs rename Ryujinx.HLE/OsHle/Services/Aud/{ => AudioOut}/AudioOutData.cs (86%) rename Ryujinx.HLE/OsHle/Services/Aud/{ => AudioOut}/IAudioOut.cs (98%) create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/AudioConsts.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BehaviorIn.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BiquadFilter.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/IAudioRenderer.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolContext.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolIn.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolOut.cs rename Ryujinx.HLE/OsHle/Services/Aud/{ => AudioRenderer}/MemoryPoolState.cs (80%) create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/PlayState.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/Resampler.cs rename Ryujinx.HLE/OsHle/Services/Aud/{ => AudioRenderer}/UpdateDataHeader.cs (65%) create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceChannelResourceIn.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceContext.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceIn.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceOut.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/WaveBuffer.cs delete mode 100644 Ryujinx.HLE/OsHle/Services/Aud/IAudioRenderer.cs rename Ryujinx.Audio/AudioFormat.cs => Ryujinx.HLE/OsHle/Services/Aud/SampleFormat.cs (51%) delete mode 100644 Ryujinx.HLE/OsHle/Services/Aud/VoiceState.cs create mode 100644 Ryujinx.HLE/OsHle/Utilities/StructReader.cs create mode 100644 Ryujinx.HLE/OsHle/Utilities/StructWriter.cs diff --git a/Ryujinx.Audio/Adpcm/AdpcmDecoder.cs b/Ryujinx.Audio/Adpcm/AdpcmDecoder.cs new file mode 100644 index 000000000..24455b418 --- /dev/null +++ b/Ryujinx.Audio/Adpcm/AdpcmDecoder.cs @@ -0,0 +1,91 @@ +namespace Ryujinx.Audio.Adpcm +{ + public static class AdpcmDecoder + { + private const int SamplesPerFrame = 14; + private const int BytesPerFrame = 8; + + public static int[] Decode(byte[] Buffer, AdpcmDecoderContext Context) + { + int Samples = GetSamplesCountFromSize(Buffer.Length); + + int[] Pcm = new int[Samples * 2]; + + short History0 = Context.History0; + short History1 = Context.History1; + + int InputOffset = 0; + int OutputOffset = 0; + + while (InputOffset < Buffer.Length) + { + byte Header = Buffer[InputOffset++]; + + int Scale = 0x800 << (Header & 0xf); + + int CoeffIndex = (Header >> 4) & 7; + + short Coeff0 = Context.Coefficients[CoeffIndex * 2 + 0]; + short Coeff1 = Context.Coefficients[CoeffIndex * 2 + 1]; + + int FrameSamples = SamplesPerFrame; + + if (FrameSamples > Samples) + { + FrameSamples = Samples; + } + + int Value = 0; + + for (int SampleIndex = 0; SampleIndex < FrameSamples; SampleIndex++) + { + int Sample; + + if ((SampleIndex & 1) == 0) + { + Value = Buffer[InputOffset++]; + + Sample = (Value << 24) >> 28; + } + else + { + Sample = (Value << 28) >> 28; + } + + int Prediction = Coeff0 * History0 + Coeff1 * History1; + + Sample = (Sample * Scale + Prediction + 0x400) >> 11; + + short SaturatedSample = DspUtils.Saturate(Sample); + + History1 = History0; + History0 = SaturatedSample; + + Pcm[OutputOffset++] = SaturatedSample; + Pcm[OutputOffset++] = SaturatedSample; + } + + Samples -= FrameSamples; + } + + Context.History0 = History0; + Context.History1 = History1; + + return Pcm; + } + + public static long GetSizeFromSamplesCount(int SamplesCount) + { + int Frames = SamplesCount / SamplesPerFrame; + + return Frames * BytesPerFrame; + } + + public static int GetSamplesCountFromSize(long Size) + { + int Frames = (int)(Size / BytesPerFrame); + + return Frames * SamplesPerFrame; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Audio/Adpcm/AdpcmDecoderContext.cs b/Ryujinx.Audio/Adpcm/AdpcmDecoderContext.cs new file mode 100644 index 000000000..91730333c --- /dev/null +++ b/Ryujinx.Audio/Adpcm/AdpcmDecoderContext.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Audio.Adpcm +{ + public class AdpcmDecoderContext + { + public short[] Coefficients; + + public short History0; + public short History1; + } +} \ No newline at end of file diff --git a/Ryujinx.Audio/DspUtils.cs b/Ryujinx.Audio/DspUtils.cs new file mode 100644 index 000000000..c048161da --- /dev/null +++ b/Ryujinx.Audio/DspUtils.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Audio.Adpcm +{ + public static class DspUtils + { + public static short Saturate(int Value) + { + if (Value > short.MaxValue) + Value = short.MaxValue; + + if (Value < short.MinValue) + Value = short.MinValue; + + return (short)Value; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Audio/IAalOutput.cs b/Ryujinx.Audio/IAalOutput.cs index f9978ee4d..e903c5c5c 100644 --- a/Ryujinx.Audio/IAalOutput.cs +++ b/Ryujinx.Audio/IAalOutput.cs @@ -2,11 +2,7 @@ namespace Ryujinx.Audio { public interface IAalOutput { - int OpenTrack( - int SampleRate, - int Channels, - ReleaseCallback Callback, - out AudioFormat Format); + int OpenTrack(int SampleRate, int Channels, ReleaseCallback Callback); void CloseTrack(int Track); @@ -14,7 +10,7 @@ namespace Ryujinx.Audio long[] GetReleasedBuffers(int Track, int MaxCount); - void AppendBuffer(int Track, long Tag, byte[] Buffer); + void AppendBuffer(int Track, long Tag, T[] Buffer) where T : struct; void Start(int Track); void Stop(int Track); diff --git a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs index 2860dc2e2..1dd63202b 100644 --- a/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs +++ b/Ryujinx.Audio/OpenAL/OpenALAudioOut.cs @@ -3,6 +3,7 @@ using OpenTK.Audio.OpenAL; using System; using System.Collections.Concurrent; using System.Collections.Generic; +using System.Runtime.InteropServices; using System.Threading; namespace Ryujinx.Audio.OpenAL @@ -226,15 +227,9 @@ namespace Ryujinx.Audio.OpenAL while (KeepPolling); } - public int OpenTrack( - int SampleRate, - int Channels, - ReleaseCallback Callback, - out AudioFormat Format) + public int OpenTrack(int SampleRate, int Channels, ReleaseCallback Callback) { - Format = AudioFormat.PcmInt16; - - Track Td = new Track(SampleRate, GetALFormat(Channels, Format), Callback); + Track Td = new Track(SampleRate, GetALFormat(Channels), Callback); for (int Id = 0; Id < MaxTracks; Id++) { @@ -247,38 +242,16 @@ namespace Ryujinx.Audio.OpenAL return -1; } - private ALFormat GetALFormat(int Channels, AudioFormat Format) + private ALFormat GetALFormat(int Channels) { - if (Channels == 1) + switch (Channels) { - switch (Format) - { - case AudioFormat.PcmInt8: return ALFormat.Mono8; - case AudioFormat.PcmInt16: return ALFormat.Mono16; - } - } - else if (Channels == 2) - { - switch (Format) - { - case AudioFormat.PcmInt8: return ALFormat.Stereo8; - case AudioFormat.PcmInt16: return ALFormat.Stereo16; - } - } - else if (Channels == 6) - { - switch (Format) - { - case AudioFormat.PcmInt8: return ALFormat.Multi51Chn8Ext; - case AudioFormat.PcmInt16: return ALFormat.Multi51Chn16Ext; - } - } - else - { - throw new ArgumentOutOfRangeException(nameof(Channels)); + case 1: return ALFormat.Mono16; + case 2: return ALFormat.Stereo16; + case 6: return ALFormat.Multi51Chn16Ext; } - throw new ArgumentException(nameof(Format)); + throw new ArgumentOutOfRangeException(nameof(Channels)); } public void CloseTrack(int Track) @@ -309,13 +282,15 @@ namespace Ryujinx.Audio.OpenAL return null; } - public void AppendBuffer(int Track, long Tag, byte[] Buffer) + public void AppendBuffer(int Track, long Tag, T[] Buffer) where T : struct { if (Tracks.TryGetValue(Track, out Track Td)) { int BufferId = Td.AppendBuffer(Tag); - AL.BufferData(BufferId, Td.Format, Buffer, Buffer.Length, Td.SampleRate); + int Size = Buffer.Length * Marshal.SizeOf(); + + AL.BufferData(BufferId, Td.Format, Buffer, Size, Td.SampleRate); AL.SourceQueueBuffer(Td.SourceId, BufferId); @@ -366,7 +341,5 @@ namespace Ryujinx.Audio.OpenAL return PlaybackState.Stopped; } - - } } \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs index fa201d8cd..72c3e65eb 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudErr.cs @@ -3,6 +3,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud static class AudErr { public const int DeviceNotFound = 1; + public const int UnsupportedRevision = 2; public const int UnsupportedSampleRate = 3; } } \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioOutData.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioOut/AudioOutData.cs similarity index 86% rename from Ryujinx.HLE/OsHle/Services/Aud/AudioOutData.cs rename to Ryujinx.HLE/OsHle/Services/Aud/AudioOut/AudioOutData.cs index 9d68c24ab..6887a38be 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/AudioOutData.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioOut/AudioOutData.cs @@ -1,6 +1,6 @@ using System.Runtime.InteropServices; -namespace Ryujinx.HLE.OsHle.Services.Aud +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioOut { [StructLayout(LayoutKind.Sequential)] struct AudioOutData diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOut.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioOut/IAudioOut.cs similarity index 98% rename from Ryujinx.HLE/OsHle/Services/Aud/IAudioOut.cs rename to Ryujinx.HLE/OsHle/Services/Aud/AudioOut/IAudioOut.cs index 49c87a561..d89fc293a 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOut.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioOut/IAudioOut.cs @@ -1,12 +1,11 @@ using ChocolArm64.Memory; using Ryujinx.Audio; -using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Handles; using Ryujinx.HLE.OsHle.Ipc; using System; using System.Collections.Generic; -namespace Ryujinx.HLE.OsHle.Services.Aud +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioOut { class IAudioOut : IpcService, IDisposable { diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/AudioConsts.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/AudioConsts.cs new file mode 100644 index 000000000..fed41959d --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/AudioConsts.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + static class AudioConsts + { + public const int HostSampleRate = 48000; + public const int HostChannelsCount = 2; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BehaviorIn.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BehaviorIn.cs new file mode 100644 index 000000000..4e33de621 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BehaviorIn.cs @@ -0,0 +1,11 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x10, Pack = 4)] + struct BehaviorIn + { + public long Unknown0; + public long Unknown8; + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BiquadFilter.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BiquadFilter.cs new file mode 100644 index 000000000..9fa4cd330 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/BiquadFilter.cs @@ -0,0 +1,16 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0xc, Pack = 1)] + struct BiquadFilter + { + public byte Enable; + public byte Padding; + public short B0; + public short B1; + public short B2; + public short A1; + public short A2; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/IAudioRenderer.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/IAudioRenderer.cs new file mode 100644 index 000000000..f91a8da37 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/IAudioRenderer.cs @@ -0,0 +1,316 @@ +using ChocolArm64.Memory; +using Ryujinx.Audio; +using Ryujinx.Audio.Adpcm; +using Ryujinx.HLE.Logging; +using Ryujinx.HLE.OsHle.Handles; +using Ryujinx.HLE.OsHle.Ipc; +using Ryujinx.HLE.OsHle.Utilities; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + class IAudioRenderer : IpcService, IDisposable + { + //This is the amount of samples that are going to be appended + //each time that RequestUpdateAudioRenderer is called. Ideally, + //this value shouldn't be neither too small (to avoid the player + //starving due to running out of samples) or too large (to avoid + //high latency). + private const int MixBufferSamplesCount = 960; + + private Dictionary m_Commands; + + public override IReadOnlyDictionary Commands => m_Commands; + + private KEvent UpdateEvent; + + private AMemory Memory; + + private IAalOutput AudioOut; + + private AudioRendererParameter Params; + + private MemoryPoolContext[] MemoryPools; + + private VoiceContext[] Voices; + + private int Track; + + public IAudioRenderer(AMemory Memory, IAalOutput AudioOut, AudioRendererParameter Params) + { + m_Commands = new Dictionary() + { + { 4, RequestUpdateAudioRenderer }, + { 5, StartAudioRenderer }, + { 6, StopAudioRenderer }, + { 7, QuerySystemEvent } + }; + + UpdateEvent = new KEvent(); + + this.Memory = Memory; + this.AudioOut = AudioOut; + this.Params = Params; + + Track = AudioOut.OpenTrack( + AudioConsts.HostSampleRate, + AudioConsts.HostChannelsCount, + AudioCallback); + + MemoryPools = CreateArray(Params.EffectCount + Params.VoiceCount * 4); + + Voices = CreateArray(Params.VoiceCount); + + InitializeAudioOut(); + } + + private void AudioCallback() + { + UpdateEvent.WaitEvent.Set(); + } + + private static T[] CreateArray(int Size) where T : new() + { + T[] Output = new T[Size]; + + for (int Index = 0; Index < Size; Index++) + { + Output[Index] = new T(); + } + + return Output; + } + + private void InitializeAudioOut() + { + AppendMixedBuffer(0); + AppendMixedBuffer(1); + AppendMixedBuffer(2); + + AudioOut.Start(Track); + } + + public long RequestUpdateAudioRenderer(ServiceCtx Context) + { + long OutputPosition = Context.Request.ReceiveBuff[0].Position; + long OutputSize = Context.Request.ReceiveBuff[0].Size; + + AMemoryHelper.FillWithZeros(Context.Memory, OutputPosition, (int)OutputSize); + + long InputPosition = Context.Request.SendBuff[0].Position; + + StructReader Reader = new StructReader(Context.Memory, InputPosition); + StructWriter Writer = new StructWriter(Context.Memory, OutputPosition); + + UpdateDataHeader InputHeader = Reader.Read(); + + Reader.Read(InputHeader.BehaviorSize); + + MemoryPoolIn[] MemoryPoolsIn = Reader.Read(InputHeader.MemoryPoolSize); + + for (int Index = 0; Index < MemoryPoolsIn.Length; Index++) + { + MemoryPoolIn MemoryPool = MemoryPoolsIn[Index]; + + if (MemoryPool.State == MemoryPoolState.RequestAttach) + { + MemoryPools[Index].OutStatus.State = MemoryPoolState.Attached; + } + else if (MemoryPool.State == MemoryPoolState.RequestDetach) + { + MemoryPools[Index].OutStatus.State = MemoryPoolState.Detached; + } + } + + Reader.Read(InputHeader.VoiceResourceSize); + + VoiceIn[] VoicesIn = Reader.Read(InputHeader.VoiceSize); + + for (int Index = 0; Index < VoicesIn.Length; Index++) + { + VoiceIn Voice = VoicesIn[Index]; + + VoiceContext VoiceCtx = Voices[Index]; + + VoiceCtx.SetAcquireState(Voice.Acquired != 0); + + if (Voice.Acquired == 0) + { + continue; + } + + if (Voice.FirstUpdate != 0) + { + VoiceCtx.AdpcmCtx = GetAdpcmDecoderContext( + Voice.AdpcmCoeffsPosition, + Voice.AdpcmCoeffsSize); + + VoiceCtx.SampleFormat = Voice.SampleFormat; + VoiceCtx.SampleRate = Voice.SampleRate; + VoiceCtx.ChannelsCount = Voice.ChannelsCount; + + VoiceCtx.SetBufferIndex(Voice.BaseWaveBufferIndex); + } + + VoiceCtx.WaveBuffers[0] = Voice.WaveBuffer0; + VoiceCtx.WaveBuffers[1] = Voice.WaveBuffer1; + VoiceCtx.WaveBuffers[2] = Voice.WaveBuffer2; + VoiceCtx.WaveBuffers[3] = Voice.WaveBuffer3; + VoiceCtx.Volume = Voice.Volume; + VoiceCtx.PlayState = Voice.PlayState; + } + + UpdateAudio(); + + UpdateDataHeader OutputHeader = new UpdateDataHeader(); + + int UpdateHeaderSize = Marshal.SizeOf(); + + OutputHeader.Revision = IAudioRendererManager.RevMagic; + OutputHeader.BehaviorSize = 0xb0; + OutputHeader.MemoryPoolSize = (Params.EffectCount + Params.VoiceCount * 4) * 0x10; + OutputHeader.VoiceSize = Params.VoiceCount * 0x10; + OutputHeader.EffectSize = Params.EffectCount * 0x10; + OutputHeader.SinkSize = Params.SinkCount * 0x20; + OutputHeader.PerformanceManagerSize = 0x10; + OutputHeader.TotalSize = UpdateHeaderSize + + OutputHeader.BehaviorSize + + OutputHeader.MemoryPoolSize + + OutputHeader.VoiceSize + + OutputHeader.EffectSize + + OutputHeader.SinkSize + + OutputHeader.PerformanceManagerSize; + + Writer.Write(OutputHeader); + + foreach (MemoryPoolContext MemoryPool in MemoryPools) + { + Writer.Write(MemoryPool.OutStatus); + } + + foreach (VoiceContext Voice in Voices) + { + Writer.Write(Voice.OutStatus); + } + + return 0; + } + + public long StartAudioRenderer(ServiceCtx Context) + { + Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed."); + + return 0; + } + + public long StopAudioRenderer(ServiceCtx Context) + { + Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed."); + + return 0; + } + + public long QuerySystemEvent(ServiceCtx Context) + { + int Handle = Context.Process.HandleTable.OpenHandle(UpdateEvent); + + Context.Response.HandleDesc = IpcHandleDesc.MakeCopy(Handle); + + return 0; + } + + private AdpcmDecoderContext GetAdpcmDecoderContext(long Position, long Size) + { + if (Size == 0) + { + return null; + } + + AdpcmDecoderContext Context = new AdpcmDecoderContext(); + + Context.Coefficients = new short[Size >> 1]; + + for (int Offset = 0; Offset < Size; Offset += 2) + { + Context.Coefficients[Offset >> 1] = Memory.ReadInt16(Position + Offset); + } + + return Context; + } + + private void UpdateAudio() + { + long[] Released = AudioOut.GetReleasedBuffers(Track, 2); + + for (int Index = 0; Index < Released.Length; Index++) + { + AppendMixedBuffer(Released[Index]); + } + } + + private void AppendMixedBuffer(long Tag) + { + int[] MixBuffer = new int[MixBufferSamplesCount * AudioConsts.HostChannelsCount]; + + foreach (VoiceContext Voice in Voices) + { + if (!Voice.Playing) + { + continue; + } + + int OutOffset = 0; + + int PendingSamples = MixBufferSamplesCount; + + while (PendingSamples > 0) + { + int[] Samples = Voice.GetBufferData(Memory, PendingSamples, out int ReturnedSamples); + + if (ReturnedSamples == 0) + { + break; + } + + PendingSamples -= ReturnedSamples; + + for (int Offset = 0; Offset < Samples.Length; Offset++) + { + int Sample = (int)(Samples[Offset] * Voice.Volume); + + MixBuffer[OutOffset++] += Sample; + } + } + } + + AudioOut.AppendBuffer(Track, Tag, GetFinalBuffer(MixBuffer)); + } + + private static short[] GetFinalBuffer(int[] Buffer) + { + short[] Output = new short[Buffer.Length]; + + for (int Offset = 0; Offset < Buffer.Length; Offset++) + { + Output[Offset] = DspUtils.Saturate(Buffer[Offset]); + } + + return Output; + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool Disposing) + { + if (Disposing) + { + UpdateEvent.Dispose(); + } + } + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolContext.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolContext.cs new file mode 100644 index 000000000..b7af1d3f8 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolContext.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + class MemoryPoolContext + { + public MemoryPoolOut OutStatus; + + public MemoryPoolContext() + { + OutStatus.State = MemoryPoolState.Detached; + } + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolIn.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolIn.cs new file mode 100644 index 000000000..c852b5198 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolIn.cs @@ -0,0 +1,14 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x20, Pack = 4)] + struct MemoryPoolIn + { + public long Address; + public long Size; + public MemoryPoolState State; + public int Unknown14; + public long Unknown18; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolOut.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolOut.cs new file mode 100644 index 000000000..dd65df864 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolOut.cs @@ -0,0 +1,12 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x10, Pack = 4)] + struct MemoryPoolOut + { + public MemoryPoolState State; + public int Unknown14; + public long Unknown18; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/MemoryPoolState.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolState.cs similarity index 80% rename from Ryujinx.HLE/OsHle/Services/Aud/MemoryPoolState.cs rename to Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolState.cs index 892cde49e..f96a0c09d 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/MemoryPoolState.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/MemoryPoolState.cs @@ -1,4 +1,4 @@ -namespace Ryujinx.HLE.OsHle.Services.Aud +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer { enum MemoryPoolState : int { diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/PlayState.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/PlayState.cs new file mode 100644 index 000000000..e8bcf64f3 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/PlayState.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + enum PlayState : byte + { + Playing = 0, + Stopped = 1, + Paused = 2 + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/Resampler.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/Resampler.cs new file mode 100644 index 000000000..31e0ebecd --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/Resampler.cs @@ -0,0 +1,191 @@ +using System; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + static class Resampler + { +#region "LookUp Tables" + private static short[] CurveLut0 = new short[] + { + 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, 19412, 7093, 22, + 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, 7472, 41, 5773, 19361, 7600, 48, + 5659, 19342, 7728, 55, 5546, 19321, 7857, 62, 5434, 19298, 7987, 69, 5323, 19273, 8118, 77, + 5213, 19245, 8249, 84, 5104, 19215, 8381, 92, 4997, 19183, 8513, 101, 4890, 19148, 8646, 109, + 4785, 19112, 8780, 118, 4681, 19073, 8914, 127, 4579, 19031, 9048, 137, 4477, 18988, 9183, 147, + 4377, 18942, 9318, 157, 4277, 18895, 9454, 168, 4179, 18845, 9590, 179, 4083, 18793, 9726, 190, + 3987, 18738, 9863, 202, 3893, 18682, 10000, 215, 3800, 18624, 10137, 228, 3709, 18563, 10274, 241, + 3618, 18500, 10411, 255, 3529, 18436, 10549, 270, 3441, 18369, 10687, 285, 3355, 18300, 10824, 300, + 3269, 18230, 10962, 317, 3186, 18157, 11100, 334, 3103, 18082, 11238, 351, 3022, 18006, 11375, 369, + 2942, 17927, 11513, 388, 2863, 17847, 11650, 408, 2785, 17765, 11788, 428, 2709, 17681, 11925, 449, + 2635, 17595, 12062, 471, 2561, 17507, 12198, 494, 2489, 17418, 12334, 517, 2418, 17327, 12470, 541, + 2348, 17234, 12606, 566, 2280, 17140, 12741, 592, 2213, 17044, 12876, 619, 2147, 16946, 13010, 647, + 2083, 16846, 13144, 675, 2020, 16745, 13277, 704, 1958, 16643, 13409, 735, 1897, 16539, 13541, 766, + 1838, 16434, 13673, 798, 1780, 16327, 13803, 832, 1723, 16218, 13933, 866, 1667, 16109, 14062, 901, + 1613, 15998, 14191, 937, 1560, 15885, 14318, 975, 1508, 15772, 14445, 1013, 1457, 15657, 14571, 1052, + 1407, 15540, 14695, 1093, 1359, 15423, 14819, 1134, 1312, 15304, 14942, 1177, 1266, 15185, 15064, 1221, + 1221, 15064, 15185, 1266, 1177, 14942, 15304, 1312, 1134, 14819, 15423, 1359, 1093, 14695, 15540, 1407, + 1052, 14571, 15657, 1457, 1013, 14445, 15772, 1508, 975, 14318, 15885, 1560, 937, 14191, 15998, 1613, + 901, 14062, 16109, 1667, 866, 13933, 16218, 1723, 832, 13803, 16327, 1780, 798, 13673, 16434, 1838, + 766, 13541, 16539, 1897, 735, 13409, 16643, 1958, 704, 13277, 16745, 2020, 675, 13144, 16846, 2083, + 647, 13010, 16946, 2147, 619, 12876, 17044, 2213, 592, 12741, 17140, 2280, 566, 12606, 17234, 2348, + 541, 12470, 17327, 2418, 517, 12334, 17418, 2489, 494, 12198, 17507, 2561, 471, 12062, 17595, 2635, + 449, 11925, 17681, 2709, 428, 11788, 17765, 2785, 408, 11650, 17847, 2863, 388, 11513, 17927, 2942, + 369, 11375, 18006, 3022, 351, 11238, 18082, 3103, 334, 11100, 18157, 3186, 317, 10962, 18230, 3269, + 300, 10824, 18300, 3355, 285, 10687, 18369, 3441, 270, 10549, 18436, 3529, 255, 10411, 18500, 3618, + 241, 10274, 18563, 3709, 228, 10137, 18624, 3800, 215, 10000, 18682, 3893, 202, 9863, 18738, 3987, + 190, 9726, 18793, 4083, 179, 9590, 18845, 4179, 168, 9454, 18895, 4277, 157, 9318, 18942, 4377, + 147, 9183, 18988, 4477, 137, 9048, 19031, 4579, 127, 8914, 19073, 4681, 118, 8780, 19112, 4785, + 109, 8646, 19148, 4890, 101, 8513, 19183, 4997, 92, 8381, 19215, 5104, 84, 8249, 19245, 5213, + 77, 8118, 19273, 5323, 69, 7987, 19298, 5434, 62, 7857, 19321, 5546, 55, 7728, 19342, 5659, + 48, 7600, 19361, 5773, 41, 7472, 19377, 5888, 34, 7345, 19391, 6004, 28, 7219, 19403, 6121, + 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, 6479, 3, 6722, 19426, 6600 + }; + + private static short[] CurveLut1 = new short[] + { + -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, 32586, 512, -36, + -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, 1000, -69, -891, 32393, 1174, -80, + -990, 32323, 1352, -92, -1084, 32244, 1536, -103, -1173, 32157, 1724, -115, -1258, 32061, 1919, -128, + -1338, 31956, 2118, -140, -1414, 31844, 2322, -153, -1486, 31723, 2532, -167, -1554, 31593, 2747, -180, + -1617, 31456, 2967, -194, -1676, 31310, 3192, -209, -1732, 31157, 3422, -224, -1783, 30995, 3657, -240, + -1830, 30826, 3897, -256, -1874, 30649, 4143, -272, -1914, 30464, 4393, -289, -1951, 30272, 4648, -307, + -1984, 30072, 4908, -325, -2014, 29866, 5172, -343, -2040, 29652, 5442, -362, -2063, 29431, 5716, -382, + -2083, 29203, 5994, -403, -2100, 28968, 6277, -424, -2114, 28727, 6565, -445, -2125, 28480, 6857, -468, + -2133, 28226, 7153, -490, -2139, 27966, 7453, -514, -2142, 27700, 7758, -538, -2142, 27428, 8066, -563, + -2141, 27151, 8378, -588, -2136, 26867, 8694, -614, -2130, 26579, 9013, -641, -2121, 26285, 9336, -668, + -2111, 25987, 9663, -696, -2098, 25683, 9993, -724, -2084, 25375, 10326, -753, -2067, 25063, 10662, -783, + -2049, 24746, 11000, -813, -2030, 24425, 11342, -844, -2009, 24100, 11686, -875, -1986, 23771, 12033, -907, + -1962, 23438, 12382, -939, -1937, 23103, 12733, -972, -1911, 22764, 13086, -1005, -1883, 22422, 13441, -1039, + -1855, 22077, 13798, -1072, -1825, 21729, 14156, -1107, -1795, 21380, 14516, -1141, -1764, 21027, 14877, -1176, + -1732, 20673, 15239, -1211, -1700, 20317, 15602, -1246, -1667, 19959, 15965, -1282, -1633, 19600, 16329, -1317, + -1599, 19239, 16694, -1353, -1564, 18878, 17058, -1388, -1530, 18515, 17423, -1424, -1495, 18151, 17787, -1459, + -1459, 17787, 18151, -1495, -1424, 17423, 18515, -1530, -1388, 17058, 18878, -1564, -1353, 16694, 19239, -1599, + -1317, 16329, 19600, -1633, -1282, 15965, 19959, -1667, -1246, 15602, 20317, -1700, -1211, 15239, 20673, -1732, + -1176, 14877, 21027, -1764, -1141, 14516, 21380, -1795, -1107, 14156, 21729, -1825, -1072, 13798, 22077, -1855, + -1039, 13441, 22422, -1883, -1005, 13086, 22764, -1911, -972, 12733, 23103, -1937, -939, 12382, 23438, -1962, + -907, 12033, 23771, -1986, -875, 11686, 24100, -2009, -844, 11342, 24425, -2030, -813, 11000, 24746, -2049, + -783, 10662, 25063, -2067, -753, 10326, 25375, -2084, -724, 9993, 25683, -2098, -696, 9663, 25987, -2111, + -668, 9336, 26285, -2121, -641, 9013, 26579, -2130, -614, 8694, 26867, -2136, -588, 8378, 27151, -2141, + -563, 8066, 27428, -2142, -538, 7758, 27700, -2142, -514, 7453, 27966, -2139, -490, 7153, 28226, -2133, + -468, 6857, 28480, -2125, -445, 6565, 28727, -2114, -424, 6277, 28968, -2100, -403, 5994, 29203, -2083, + -382, 5716, 29431, -2063, -362, 5442, 29652, -2040, -343, 5172, 29866, -2014, -325, 4908, 30072, -1984, + -307, 4648, 30272, -1951, -289, 4393, 30464, -1914, -272, 4143, 30649, -1874, -256, 3897, 30826, -1830, + -240, 3657, 30995, -1783, -224, 3422, 31157, -1732, -209, 3192, 31310, -1676, -194, 2967, 31456, -1617, + -180, 2747, 31593, -1554, -167, 2532, 31723, -1486, -153, 2322, 31844, -1414, -140, 2118, 31956, -1338, + -128, 1919, 32061, -1258, -115, 1724, 32157, -1173, -103, 1536, 32244, -1084, -92, 1352, 32323, -990, + -80, 1174, 32393, -891, -69, 1000, 32454, -788, -58, 832, 32507, -680, -47, 669, 32551, -568, + -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, -200, -5, 69, 32639, -68 + }; + + private static short[] CurveLut2 = new short[] + { + 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, 26253, 3751, -42, + 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, 4199, -54, 2338, 26130, 4354, -58, + 2227, 26085, 4512, -63, 2120, 26035, 4673, -67, 2015, 25980, 4837, -72, 1912, 25919, 5004, -76, + 1813, 25852, 5174, -81, 1716, 25780, 5347, -87, 1622, 25704, 5522, -92, 1531, 25621, 5701, -98, + 1442, 25533, 5882, -103, 1357, 25440, 6066, -109, 1274, 25342, 6253, -115, 1193, 25239, 6442, -121, + 1115, 25131, 6635, -127, 1040, 25018, 6830, -133, 967, 24899, 7027, -140, 897, 24776, 7227, -146, + 829, 24648, 7430, -153, 764, 24516, 7635, -159, 701, 24379, 7842, -166, 641, 24237, 8052, -174, + 583, 24091, 8264, -181, 526, 23940, 8478, -187, 472, 23785, 8695, -194, 420, 23626, 8914, -202, + 371, 23462, 9135, -209, 324, 23295, 9358, -215, 279, 23123, 9583, -222, 236, 22948, 9809, -230, + 194, 22769, 10038, -237, 154, 22586, 10269, -243, 117, 22399, 10501, -250, 81, 22208, 10735, -258, + 47, 22015, 10970, -265, 15, 21818, 11206, -271, -16, 21618, 11444, -277, -44, 21415, 11684, -283, + -71, 21208, 11924, -290, -97, 20999, 12166, -296, -121, 20786, 12409, -302, -143, 20571, 12653, -306, + -163, 20354, 12898, -311, -183, 20134, 13143, -316, -201, 19911, 13389, -321, -218, 19686, 13635, -325, + -234, 19459, 13882, -328, -248, 19230, 14130, -332, -261, 18998, 14377, -335, -273, 18765, 14625, -337, + -284, 18531, 14873, -339, -294, 18295, 15121, -341, -302, 18057, 15369, -341, -310, 17817, 15617, -341, + -317, 17577, 15864, -340, -323, 17335, 16111, -340, -328, 17092, 16357, -338, -332, 16848, 16603, -336, + -336, 16603, 16848, -332, -338, 16357, 17092, -328, -340, 16111, 17335, -323, -340, 15864, 17577, -317, + -341, 15617, 17817, -310, -341, 15369, 18057, -302, -341, 15121, 18295, -294, -339, 14873, 18531, -284, + -337, 14625, 18765, -273, -335, 14377, 18998, -261, -332, 14130, 19230, -248, -328, 13882, 19459, -234, + -325, 13635, 19686, -218, -321, 13389, 19911, -201, -316, 13143, 20134, -183, -311, 12898, 20354, -163, + -306, 12653, 20571, -143, -302, 12409, 20786, -121, -296, 12166, 20999, -97, -290, 11924, 21208, -71, + -283, 11684, 21415, -44, -277, 11444, 21618, -16, -271, 11206, 21818, 15, -265, 10970, 22015, 47, + -258, 10735, 22208, 81, -250, 10501, 22399, 117, -243, 10269, 22586, 154, -237, 10038, 22769, 194, + -230, 9809, 22948, 236, -222, 9583, 23123, 279, -215, 9358, 23295, 324, -209, 9135, 23462, 371, + -202, 8914, 23626, 420, -194, 8695, 23785, 472, -187, 8478, 23940, 526, -181, 8264, 24091, 583, + -174, 8052, 24237, 641, -166, 7842, 24379, 701, -159, 7635, 24516, 764, -153, 7430, 24648, 829, + -146, 7227, 24776, 897, -140, 7027, 24899, 967, -133, 6830, 25018, 1040, -127, 6635, 25131, 1115, + -121, 6442, 25239, 1193, -115, 6253, 25342, 1274, -109, 6066, 25440, 1357, -103, 5882, 25533, 1442, + -98, 5701, 25621, 1531, -92, 5522, 25704, 1622, -87, 5347, 25780, 1716, -81, 5174, 25852, 1813, + -76, 5004, 25919, 1912, -72, 4837, 25980, 2015, -67, 4673, 26035, 2120, -63, 4512, 26085, 2227, + -58, 4354, 26130, 2338, -54, 4199, 26169, 2451, -50, 4046, 26202, 2568, -46, 3897, 26230, 2688, + -42, 3751, 26253, 2811, -38, 3608, 26270, 2936, -34, 3467, 26281, 3064, -32, 3329, 26287, 3195 + }; +#endregion + + public static int[] Resample2Ch( + int[] Buffer, + int SrcSampleRate, + int DstSampleRate, + int SamplesCount, + ref int FracPart) + { + if (Buffer == null) + { + throw new ArgumentNullException(nameof(Buffer)); + } + + if (SrcSampleRate <= 0) + { + throw new ArgumentOutOfRangeException(nameof(SrcSampleRate)); + } + + if (DstSampleRate <= 0) + { + throw new ArgumentOutOfRangeException(nameof(DstSampleRate)); + } + + double Ratio = (double)SrcSampleRate / DstSampleRate; + + int NewSamplesCount = (int)(SamplesCount / Ratio); + + int Step = (int)(Ratio * 0x8000); + + int[] Output = new int[NewSamplesCount * 2]; + + short[] Lut; + + if (Step > 0xaaaa) + { + Lut = CurveLut0; + } + else if (Step <= 0x8000) + { + Lut = CurveLut1; + } + else + { + Lut = CurveLut2; + } + + int InOffs = 0; + + for (int OutOffs = 0; OutOffs < Output.Length; OutOffs += 2) + { + int LutIndex = (FracPart >> 8) * 4; + + int Sample0 = Buffer[(InOffs + 0) * 2 + 0] * Lut[LutIndex + 0] + + Buffer[(InOffs + 1) * 2 + 0] * Lut[LutIndex + 1] + + Buffer[(InOffs + 2) * 2 + 0] * Lut[LutIndex + 2] + + Buffer[(InOffs + 3) * 2 + 0] * Lut[LutIndex + 3]; + + int Sample1 = Buffer[(InOffs + 0) * 2 + 1] * Lut[LutIndex + 0] + + Buffer[(InOffs + 1) * 2 + 1] * Lut[LutIndex + 1] + + Buffer[(InOffs + 2) * 2 + 1] * Lut[LutIndex + 2] + + Buffer[(InOffs + 3) * 2 + 1] * Lut[LutIndex + 3]; + + int NewOffset = FracPart + Step; + + InOffs += NewOffset >> 15; + + FracPart = NewOffset & 0x7fff; + + Output[OutOffs + 0] = Sample0 >> 15; + Output[OutOffs + 1] = Sample1 >> 15; + } + + return Output; + } + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Aud/UpdateDataHeader.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/UpdateDataHeader.cs similarity index 65% rename from Ryujinx.HLE/OsHle/Services/Aud/UpdateDataHeader.cs rename to Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/UpdateDataHeader.cs index f944b302e..a6dfbc0bc 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/UpdateDataHeader.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/UpdateDataHeader.cs @@ -1,15 +1,15 @@ -namespace Ryujinx.HLE.OsHle.Services.Aud +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer { struct UpdateDataHeader { public int Revision; public int BehaviorSize; - public int MemoryPoolsSize; - public int VoicesSize; + public int MemoryPoolSize; + public int VoiceSize; public int VoiceResourceSize; - public int EffectsSize; - public int MixesSize; - public int SinksSize; + public int EffectSize; + public int MixeSize; + public int SinkSize; public int PerformanceManagerSize; public int Unknown24; public int Unknown28; diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceChannelResourceIn.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceChannelResourceIn.cs new file mode 100644 index 000000000..0916b03e2 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceChannelResourceIn.cs @@ -0,0 +1,10 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x70, Pack = 1)] + struct VoiceChannelResourceIn + { + //??? + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceContext.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceContext.cs new file mode 100644 index 000000000..1bf9ed73e --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceContext.cs @@ -0,0 +1,188 @@ +using ChocolArm64.Memory; +using Ryujinx.Audio.Adpcm; +using System; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + class VoiceContext + { + private bool Acquired; + private bool BufferReload; + + private int ResamplerFracPart; + + private int BufferIndex; + private int Offset; + + public int SampleRate; + public int ChannelsCount; + + public float Volume; + + public PlayState PlayState; + + public SampleFormat SampleFormat; + + public AdpcmDecoderContext AdpcmCtx; + + public WaveBuffer[] WaveBuffers; + + public VoiceOut OutStatus; + + private int[] Samples; + + public bool Playing => Acquired && PlayState == PlayState.Playing; + + public VoiceContext() + { + WaveBuffers = new WaveBuffer[4]; + } + + public void SetAcquireState(bool NewState) + { + if (Acquired && !NewState) + { + //Release. + Reset(); + } + + Acquired = NewState; + } + + private void Reset() + { + BufferReload = true; + + BufferIndex = 0; + Offset = 0; + + OutStatus.PlayedSamplesCount = 0; + OutStatus.PlayedWaveBuffersCount = 0; + OutStatus.VoiceDropsCount = 0; + } + + public int[] GetBufferData(AMemory Memory, int MaxSamples, out int SamplesCount) + { + if (!Playing) + { + SamplesCount = 0; + + return null; + } + + if (BufferReload) + { + BufferReload = false; + + UpdateBuffer(Memory); + } + + WaveBuffer Wb = WaveBuffers[BufferIndex]; + + int MaxSize = Samples.Length - Offset; + + int Size = MaxSamples * AudioConsts.HostChannelsCount; + + if (Size > MaxSize) + { + Size = MaxSize; + } + + int[] Output = new int[Size]; + + Array.Copy(Samples, Offset, Output, 0, Size); + + SamplesCount = Size / AudioConsts.HostChannelsCount; + + OutStatus.PlayedSamplesCount += SamplesCount; + + Offset += Size; + + if (Offset == Samples.Length) + { + Offset = 0; + + if (Wb.Looping == 0) + { + SetBufferIndex((BufferIndex + 1) & 3); + } + + OutStatus.PlayedWaveBuffersCount++; + + if (Wb.LastBuffer != 0) + { + PlayState = PlayState.Paused; + } + } + + return Output; + } + + private void UpdateBuffer(AMemory Memory) + { + //TODO: Implement conversion for formats other + //than interleaved stereo (2 channels). + //As of now, it assumes that HostChannelsCount == 2. + WaveBuffer Wb = WaveBuffers[BufferIndex]; + + if (SampleFormat == SampleFormat.PcmInt16) + { + int SamplesCount = (int)(Wb.Size / (sizeof(short) * ChannelsCount)); + + Samples = new int[SamplesCount * AudioConsts.HostChannelsCount]; + + if (ChannelsCount == 1) + { + for (int Index = 0; Index < SamplesCount; Index++) + { + short Sample = Memory.ReadInt16(Wb.Position + Index * 2); + + Samples[Index * 2 + 0] = Sample; + Samples[Index * 2 + 1] = Sample; + } + } + else + { + for (int Index = 0; Index < SamplesCount * 2; Index++) + { + Samples[Index] = Memory.ReadInt16(Wb.Position + Index * 2); + } + } + } + else if (SampleFormat == SampleFormat.Adpcm) + { + byte[] Buffer = Memory.ReadBytes(Wb.Position, Wb.Size); + + Samples = AdpcmDecoder.Decode(Buffer, AdpcmCtx); + } + else + { + throw new InvalidOperationException(); + } + + if (SampleRate != AudioConsts.HostSampleRate) + { + //TODO: We should keep the frames being discarded (see the 4 below) + //on a buffer and include it on the next samples buffer, to allow + //the resampler to do seamless interpolation between wave buffers. + int SamplesCount = Samples.Length / AudioConsts.HostChannelsCount; + + SamplesCount = Math.Max(SamplesCount - 4, 0); + + Samples = Resampler.Resample2Ch( + Samples, + SampleRate, + AudioConsts.HostSampleRate, + SamplesCount, + ref ResamplerFracPart); + } + } + + public void SetBufferIndex(int Index) + { + BufferIndex = Index & 3; + + BufferReload = true; + } + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceIn.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceIn.cs new file mode 100644 index 000000000..790affb20 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceIn.cs @@ -0,0 +1,49 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x170, Pack = 1)] + struct VoiceIn + { + public int VoiceSlot; + public int NodeId; + + public byte FirstUpdate; + public byte Acquired; + + public PlayState PlayState; + + public SampleFormat SampleFormat; + + public int SampleRate; + + public int Priority; + + public int Unknown14; + + public int ChannelsCount; + + public float Pitch; + public float Volume; + + public BiquadFilter BiquadFilter0; + public BiquadFilter BiquadFilter1; + + public int AppendedWaveBuffersCount; + + public int BaseWaveBufferIndex; + + public int Unknown44; + + public long AdpcmCoeffsPosition; + public long AdpcmCoeffsSize; + + public int VoiceDestination; + public int Padding; + + public WaveBuffer WaveBuffer0; + public WaveBuffer WaveBuffer1; + public WaveBuffer WaveBuffer2; + public WaveBuffer WaveBuffer3; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceOut.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceOut.cs new file mode 100644 index 000000000..1fcf929f2 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/VoiceOut.cs @@ -0,0 +1,12 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x10, Pack = 4)] + struct VoiceOut + { + public long PlayedSamplesCount; + public int PlayedWaveBuffersCount; + public int VoiceDropsCount; //? + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/WaveBuffer.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/WaveBuffer.cs new file mode 100644 index 000000000..6b56b9081 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRenderer/WaveBuffer.cs @@ -0,0 +1,20 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer +{ + [StructLayout(LayoutKind.Sequential, Size = 0x38, Pack = 1)] + struct WaveBuffer + { + public long Position; + public long Size; + public int FirstSampleOffset; + public int LastSampleOffset; + public byte Looping; + public byte LastBuffer; + public short Unknown1A; + public int Unknown1C; + public long AdpcmLoopContextPosition; + public long AdpcmLoopContextSize; + public long Unknown30; + } +} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/AudioRendererParameter.cs b/Ryujinx.HLE/OsHle/Services/Aud/AudioRendererParameter.cs index 0a0792ec5..d7e1df01a 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/AudioRendererParameter.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/AudioRendererParameter.cs @@ -8,14 +8,14 @@ namespace Ryujinx.HLE.OsHle.Services.Aud public int SampleRate; public int SampleCount; public int Unknown8; - public int UnknownC; + public int MixCount; public int VoiceCount; public int SinkCount; public int EffectCount; - public int Unknown1C; - public int Unknown20; + public int PerformanceManagerCount; + public int VoiceDropEnable; public int SplitterCount; - public int Unknown28; + public int SplitterDestinationDataCount; public int Unknown2C; public int Revision; } diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs index 8c78d1d49..7a3bc4d4f 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioOutManager.cs @@ -3,6 +3,7 @@ using Ryujinx.Audio; using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Handles; using Ryujinx.HLE.OsHle.Ipc; +using Ryujinx.HLE.OsHle.Services.Aud.AudioOut; using System.Collections.Generic; using System.Text; @@ -154,13 +155,13 @@ namespace Ryujinx.HLE.OsHle.Services.Aud IAalOutput AudioOut = Context.Ns.AudioOut; - int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback, out AudioFormat Format); + int Track = AudioOut.OpenTrack(SampleRate, Channels, Callback); MakeObject(Context, new IAudioOut(AudioOut, ReleaseEvent, Track)); Context.ResponseData.Write(SampleRate); Context.ResponseData.Write(Channels); - Context.ResponseData.Write((int)Format); + Context.ResponseData.Write((int)SampleFormat.PcmInt16); Context.ResponseData.Write((int)PlaybackState.Stopped); return 0; diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRenderer.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRenderer.cs deleted file mode 100644 index bd9188c34..000000000 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRenderer.cs +++ /dev/null @@ -1,136 +0,0 @@ -using ChocolArm64.Memory; -using Ryujinx.HLE.Logging; -using Ryujinx.HLE.OsHle.Handles; -using Ryujinx.HLE.OsHle.Ipc; -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; - -namespace Ryujinx.HLE.OsHle.Services.Aud -{ - class IAudioRenderer : IpcService, IDisposable - { - private Dictionary m_Commands; - - public override IReadOnlyDictionary Commands => m_Commands; - - private KEvent UpdateEvent; - - private AudioRendererParameter Params; - - public IAudioRenderer(AudioRendererParameter Params) - { - m_Commands = new Dictionary() - { - { 4, RequestUpdateAudioRenderer }, - { 5, StartAudioRenderer }, - { 6, StopAudioRenderer }, - { 7, QuerySystemEvent } - }; - - UpdateEvent = new KEvent(); - - this.Params = Params; - } - - public long RequestUpdateAudioRenderer(ServiceCtx Context) - { - long OutputPosition = Context.Request.ReceiveBuff[0].Position; - long OutputSize = Context.Request.ReceiveBuff[0].Size; - - AMemoryHelper.FillWithZeros(Context.Memory, OutputPosition, (int)OutputSize); - - long InputPosition = Context.Request.SendBuff[0].Position; - - UpdateDataHeader InputDataHeader = AMemoryHelper.Read(Context.Memory, InputPosition); - - UpdateDataHeader OutputDataHeader = new UpdateDataHeader(); - - int UpdateHeaderSize = Marshal.SizeOf(); - - OutputDataHeader.Revision = Params.Revision; - OutputDataHeader.BehaviorSize = 0xb0; - OutputDataHeader.MemoryPoolsSize = (Params.EffectCount + Params.VoiceCount * 4) * 0x10; - OutputDataHeader.VoicesSize = Params.VoiceCount * 0x10; - OutputDataHeader.EffectsSize = Params.EffectCount * 0x10; - OutputDataHeader.SinksSize = Params.SinkCount * 0x20; - OutputDataHeader.PerformanceManagerSize = 0x10; - OutputDataHeader.TotalSize = UpdateHeaderSize + - OutputDataHeader.BehaviorSize + - OutputDataHeader.MemoryPoolsSize + - OutputDataHeader.VoicesSize + - OutputDataHeader.EffectsSize + - OutputDataHeader.SinksSize + - OutputDataHeader.PerformanceManagerSize; - - AMemoryHelper.Write(Context.Memory, OutputPosition, OutputDataHeader); - - int InMemoryPoolOffset = UpdateHeaderSize + InputDataHeader.BehaviorSize; - - int OutMemoryPoolOffset = UpdateHeaderSize; - - for (int Offset = 0; Offset < OutputDataHeader.MemoryPoolsSize; Offset += 0x10, InMemoryPoolOffset += 0x20) - { - MemoryPoolState PoolState = (MemoryPoolState)Context.Memory.ReadInt32(InputPosition + InMemoryPoolOffset + 0x10); - - //TODO: Figure out what the other values does. - if (PoolState == MemoryPoolState.RequestAttach) - { - Context.Memory.WriteInt32(OutputPosition + OutMemoryPoolOffset + Offset, (int)MemoryPoolState.Attached); - } - else if (PoolState == MemoryPoolState.RequestDetach) - { - Context.Memory.WriteInt32(OutputPosition + OutMemoryPoolOffset + Offset, (int)MemoryPoolState.Detached); - } - } - - int OutVoicesOffset = OutMemoryPoolOffset + OutputDataHeader.MemoryPoolsSize; - - for (int Offset = 0; Offset < OutputDataHeader.VoicesSize; Offset += 0x10) - { - Context.Memory.WriteInt32(OutputPosition + OutVoicesOffset + Offset + 8, (int)VoicePlaybackState.Finished); - } - - //TODO: We shouldn't be signaling this here. - UpdateEvent.WaitEvent.Set(); - - return 0; - } - - public long StartAudioRenderer(ServiceCtx Context) - { - Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed."); - - return 0; - } - - public long StopAudioRenderer(ServiceCtx Context) - { - Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed."); - - return 0; - } - - public long QuerySystemEvent(ServiceCtx Context) - { - int Handle = Context.Process.HandleTable.OpenHandle(UpdateEvent); - - Context.Response.HandleDesc = IpcHandleDesc.MakeCopy(Handle); - - return 0; - } - - public void Dispose() - { - Dispose(true); - } - - protected virtual void Dispose(bool Disposing) - { - if (Disposing) - { - UpdateEvent.Dispose(); - } - } - } -} diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs index a7daeedd5..021c36357 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs @@ -1,7 +1,11 @@ +using Ryujinx.Audio; using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Ipc; +using Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer; +using Ryujinx.HLE.OsHle.Utilities; using System.Collections.Generic; -using System.Runtime.InteropServices; + +using static Ryujinx.HLE.OsHle.ErrorCode; namespace Ryujinx.HLE.OsHle.Services.Aud { @@ -12,6 +16,10 @@ namespace Ryujinx.HLE.OsHle.Services.Aud ('V' << 16) | ('0' << 24); + private const int Rev = 4; + + public const int RevMagic = Rev0Magic + Rev; + private Dictionary m_Commands; public override IReadOnlyDictionary Commands => m_Commands; @@ -28,76 +36,69 @@ namespace Ryujinx.HLE.OsHle.Services.Aud public long OpenAudioRenderer(ServiceCtx Context) { - //Same buffer as GetAudioRendererWorkBufferSize is receive here. + IAalOutput AudioOut = Context.Ns.AudioOut; - AudioRendererParameter Params = new AudioRendererParameter(); + AudioRendererParameter Params = GetAudioRendererParameter(Context); - Params.SampleRate = Context.RequestData.ReadInt32(); - Params.SampleCount = Context.RequestData.ReadInt32(); - Params.Unknown8 = Context.RequestData.ReadInt32(); - Params.UnknownC = Context.RequestData.ReadInt32(); - Params.VoiceCount = Context.RequestData.ReadInt32(); - Params.SinkCount = Context.RequestData.ReadInt32(); - Params.EffectCount = Context.RequestData.ReadInt32(); - Params.Unknown1C = Context.RequestData.ReadInt32(); - Params.Unknown20 = Context.RequestData.ReadInt32(); - Params.SplitterCount = Context.RequestData.ReadInt32(); - Params.Unknown28 = Context.RequestData.ReadInt32(); - Params.Unknown2C = Context.RequestData.ReadInt32(); - Params.Revision = Context.RequestData.ReadInt32(); - - MakeObject(Context, new IAudioRenderer(Params)); + MakeObject(Context, new IAudioRenderer(Context.Memory, AudioOut, Params)); return 0; } public long GetAudioRendererWorkBufferSize(ServiceCtx Context) { - long SampleRate = Context.RequestData.ReadUInt32(); - long Unknown4 = Context.RequestData.ReadUInt32(); - long Unknown8 = Context.RequestData.ReadUInt32(); - long UnknownC = Context.RequestData.ReadUInt32(); - long Unknown10 = Context.RequestData.ReadUInt32(); //VoiceCount - long Unknown14 = Context.RequestData.ReadUInt32(); //SinkCount - long Unknown18 = Context.RequestData.ReadUInt32(); //EffectCount - long Unknown1c = Context.RequestData.ReadUInt32(); //Boolean - long Unknown20 = Context.RequestData.ReadUInt32(); //Not used here in FW3.0.1 - Boolean - long Unknown24 = Context.RequestData.ReadUInt32(); - long Unknown28 = Context.RequestData.ReadUInt32(); //SplitterCount - long Unknown2c = Context.RequestData.ReadUInt32(); //Not used here in FW3.0.1 - int RevMagic = Context.RequestData.ReadInt32(); + AudioRendererParameter Params = GetAudioRendererParameter(Context); - int Version = (RevMagic - Rev0Magic) >> 24; + int Revision = (Params.Revision - Rev0Magic) >> 24; - if (Version <= 3) //REV3 Max is supported + if (Revision <= Rev) //REV3 Max is supported { - long Size = RoundUp(Unknown8 * 4, 64); - Size += (UnknownC << 10); - Size += (UnknownC + 1) * 0x940; - Size += Unknown10 * 0x3F0; - Size += RoundUp((UnknownC + 1) * 8, 16); - Size += RoundUp(Unknown10 * 8, 16); - Size += RoundUp((0x3C0 * (Unknown14 + UnknownC) + 4 * Unknown4) * (Unknown8 + 6), 64); - Size += 0x2C0 * (Unknown14 + UnknownC) + 0x30 * (Unknown18 + (4 * Unknown10)) + 0x50; + bool IsSplitterSupported = Revision >= 3; - if (Version >= 3) //IsSplitterSupported + long Size; + + Size = IntUtils.AlignUp(Params.Unknown8 * 4, 64); + Size += Params.MixCount * 0x400; + Size += (Params.MixCount + 1) * 0x940; + Size += Params.VoiceCount * 0x3F0; + Size += IntUtils.AlignUp((Params.MixCount + 1) * 8, 16); + Size += IntUtils.AlignUp(Params.VoiceCount * 8, 16); + Size += IntUtils.AlignUp( + ((Params.SinkCount + Params.MixCount) * 0x3C0 + Params.SampleCount * 4) * + (Params.Unknown8 + 6), 64); + Size += (Params.SinkCount + Params.MixCount) * 0x2C0; + Size += (Params.EffectCount + 4 * Params.VoiceCount) * 0x30 + 0x50; + + if (IsSplitterSupported) { - Size += RoundUp((NodeStatesGetWorkBufferSize((int)UnknownC + 1) + EdgeMatrixGetWorkBufferSize((int)UnknownC + 1)), 16); - Size += 0xE0 * Unknown28 + 0x20 * Unknown24 + RoundUp(Unknown28 * 4, 16); + Size += IntUtils.AlignUp(( + NodeStatesGetWorkBufferSize(Params.MixCount + 1) + + EdgeMatrixGetWorkBufferSize(Params.MixCount + 1)), 16); + + Size += Params.SplitterDestinationDataCount * 0xE0; + Size += Params.SplitterCount * 0x20; + Size += IntUtils.AlignUp(Params.SplitterDestinationDataCount * 4, 16); } - Size = 0x4C0 * Unknown18 + RoundUp(Size, 64) + 0x170 * Unknown14 + ((Unknown10 << 8) | 0x40); + Size = Params.EffectCount * 0x4C0 + + Params.SinkCount * 0x170 + + Params.VoiceCount * 0x100 + + IntUtils.AlignUp(Size, 64) + 0x40; - if (Unknown1c >= 1) + if (Params.PerformanceManagerCount >= 1) { - Size += ((((Unknown18 + Unknown14 + Unknown10 + UnknownC + 1) * 16) + 0x658) * (Unknown1c + 1) + 0x13F) & ~0x3FL; + Size += (((Params.EffectCount + + Params.SinkCount + + Params.VoiceCount + + Params.MixCount + 1) * 16 + 0x658) * + (Params.PerformanceManagerCount + 1) + 0x13F) & ~0x3FL; } - long WorkBufferSize = (Size + 0x1907D) & ~0xFFFL; + Size = (Size + 0x1907D) & ~0xFFFL; - Context.ResponseData.Write(WorkBufferSize); + Context.ResponseData.Write(Size); - Context.Ns.Log.PrintDebug(LogClass.ServiceAudio, $"WorkBufferSize is 0x{WorkBufferSize:x16}."); + Context.Ns.Log.PrintDebug(LogClass.ServiceAudio, $"WorkBufferSize is 0x{Size:x16}."); return 0; } @@ -105,20 +106,36 @@ namespace Ryujinx.HLE.OsHle.Services.Aud { Context.ResponseData.Write(0L); - Context.Ns.Log.PrintError(LogClass.ServiceAudio, $"Library Revision 0x{RevMagic:x8} is not supported!"); + Context.Ns.Log.PrintWarning(LogClass.ServiceAudio, $"Library Revision 0x{Params.Revision:x8} is not supported!"); - return 0x499; + return MakeError(ErrorModule.Audio, AudErr.UnsupportedRevision); } } - private static long RoundUp(long Value, int Size) + private AudioRendererParameter GetAudioRendererParameter(ServiceCtx Context) { - return (Value + (Size - 1)) & ~((long)Size - 1); + AudioRendererParameter Params = new AudioRendererParameter(); + + Params.SampleRate = Context.RequestData.ReadInt32(); + Params.SampleCount = Context.RequestData.ReadInt32(); + Params.Unknown8 = Context.RequestData.ReadInt32(); + Params.MixCount = Context.RequestData.ReadInt32(); + Params.VoiceCount = Context.RequestData.ReadInt32(); + Params.SinkCount = Context.RequestData.ReadInt32(); + Params.EffectCount = Context.RequestData.ReadInt32(); + Params.PerformanceManagerCount = Context.RequestData.ReadInt32(); + Params.VoiceDropEnable = Context.RequestData.ReadInt32(); + Params.SplitterCount = Context.RequestData.ReadInt32(); + Params.SplitterDestinationDataCount = Context.RequestData.ReadInt32(); + Params.Unknown2C = Context.RequestData.ReadInt32(); + Params.Revision = Context.RequestData.ReadInt32(); + + return Params; } private static int NodeStatesGetWorkBufferSize(int Value) { - int Result = (int)RoundUp(Value, 64); + int Result = IntUtils.AlignUp(Value, 64); if (Result < 0) { @@ -130,7 +147,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud private static int EdgeMatrixGetWorkBufferSize(int Value) { - int Result = (int)RoundUp(Value * Value, 64); + int Result = IntUtils.AlignUp(Value * Value, 64); if (Result < 0) { diff --git a/Ryujinx.Audio/AudioFormat.cs b/Ryujinx.HLE/OsHle/Services/Aud/SampleFormat.cs similarity index 51% rename from Ryujinx.Audio/AudioFormat.cs rename to Ryujinx.HLE/OsHle/Services/Aud/SampleFormat.cs index 8250d1368..06ab49299 100644 --- a/Ryujinx.Audio/AudioFormat.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/SampleFormat.cs @@ -1,12 +1,12 @@ -namespace Ryujinx.Audio +namespace Ryujinx.HLE.OsHle.Services.Aud { - public enum AudioFormat + enum SampleFormat : byte { Invalid = 0, PcmInt8 = 1, PcmInt16 = 2, - PcmImt24 = 3, - PcmImt32 = 4, + PcmInt24 = 3, + PcmInt32 = 4, PcmFloat = 5, Adpcm = 6 } diff --git a/Ryujinx.HLE/OsHle/Services/Aud/VoiceState.cs b/Ryujinx.HLE/OsHle/Services/Aud/VoiceState.cs deleted file mode 100644 index 8b3433239..000000000 --- a/Ryujinx.HLE/OsHle/Services/Aud/VoiceState.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Ryujinx.HLE.OsHle.Services.Aud -{ - enum VoicePlaybackState : int - { - Playing = 0, - Finished = 1, - Paused = 2 - } -} diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvMap/NvMapIoctl.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvMap/NvMapIoctl.cs index ec10a3758..4c6107f8f 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvMap/NvMapIoctl.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvMap/NvMapIoctl.cs @@ -48,7 +48,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvMap return NvResult.InvalidInput; } - int Size = IntUtils.RoundUp(Args.Size, NvGpuVmm.PageSize); + int Size = IntUtils.AlignUp(Args.Size, NvGpuVmm.PageSize); Args.Handle = AddNvMap(Context, new NvMapHandle(Size)); @@ -121,7 +121,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvMap Map.Align = Args.Align; Map.Kind = (byte)Args.Kind; - int Size = IntUtils.RoundUp(Map.Size, NvGpuVmm.PageSize); + int Size = IntUtils.AlignUp(Map.Size, NvGpuVmm.PageSize); long Address = Args.Address; diff --git a/Ryujinx.HLE/OsHle/Utilities/IntUtils.cs b/Ryujinx.HLE/OsHle/Utilities/IntUtils.cs index ba0726c31..39cced280 100644 --- a/Ryujinx.HLE/OsHle/Utilities/IntUtils.cs +++ b/Ryujinx.HLE/OsHle/Utilities/IntUtils.cs @@ -2,12 +2,12 @@ namespace Ryujinx.HLE.OsHle.Utilities { static class IntUtils { - public static int RoundUp(int Value, int Size) + public static int AlignUp(int Value, int Size) { return (Value + (Size - 1)) & ~(Size - 1); } - public static long RoundUp(long Value, int Size) + public static long AlignUp(long Value, int Size) { return (Value + (Size - 1)) & ~((long)Size - 1); } diff --git a/Ryujinx.HLE/OsHle/Utilities/StructReader.cs b/Ryujinx.HLE/OsHle/Utilities/StructReader.cs new file mode 100644 index 000000000..e218288b6 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Utilities/StructReader.cs @@ -0,0 +1,45 @@ +using ChocolArm64.Memory; +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Utilities +{ + class StructReader + { + private AMemory Memory; + + public long Position { get; private set; } + + public StructReader(AMemory Memory, long Position) + { + this.Memory = Memory; + this.Position = Position; + } + + public T Read() where T : struct + { + T Value = AMemoryHelper.Read(Memory, Position); + + Position += Marshal.SizeOf(); + + return Value; + } + + public T[] Read(int Size) where T : struct + { + int StructSize = Marshal.SizeOf(); + + int Count = Size / StructSize; + + T[] Output = new T[Count]; + + for (int Index = 0; Index < Count; Index++) + { + Output[Index] = AMemoryHelper.Read(Memory, Position); + + Position += StructSize; + } + + return Output; + } + } +} diff --git a/Ryujinx.HLE/OsHle/Utilities/StructWriter.cs b/Ryujinx.HLE/OsHle/Utilities/StructWriter.cs new file mode 100644 index 000000000..7daa95fb6 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Utilities/StructWriter.cs @@ -0,0 +1,25 @@ +using ChocolArm64.Memory; +using System.Runtime.InteropServices; + +namespace Ryujinx.HLE.OsHle.Utilities +{ + class StructWriter + { + private AMemory Memory; + + public long Position { get; private set; } + + public StructWriter(AMemory Memory, long Position) + { + this.Memory = Memory; + this.Position = Position; + } + + public void Write(T Value) where T : struct + { + AMemoryHelper.Write(Memory, Position, Value); + + Position += Marshal.SizeOf(); + } + } +} From 21e590c3abe64a0cbe4e51ebcc9f4bb0f945fc00 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 15 Jul 2018 00:04:46 -0300 Subject: [PATCH 18/23] Add support for ioctl2, SetTimeout and KickoffPbWithAttr (#261) * Add support for ioctl2, SetTimeout and KickoffPbWithAttr * Call UnloadProcess on NvHostChannelIoctl aswell --- .../OsHle/Services/Nv/INvDrvServices.cs | 17 +-- .../Services/Nv/NvHostChannel/NvChannel.cs | 7 ++ .../Nv/NvHostChannel/NvChannelName.cs | 7 ++ .../Nv/NvHostChannel/NvHostChannelIoctl.cs | 116 +++++++++++++++--- .../NvHostChannelSubmitGpfifo.cs | 2 +- 5 files changed, 123 insertions(+), 26 deletions(-) create mode 100644 Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannel.cs create mode 100644 Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannelName.cs diff --git a/Ryujinx.HLE/OsHle/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/OsHle/Services/Nv/INvDrvServices.cs index 5c1748bdb..4654d15f6 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/INvDrvServices.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/INvDrvServices.cs @@ -23,11 +23,11 @@ namespace Ryujinx.HLE.OsHle.Services.Nv private static Dictionary IoctlProcessors = new Dictionary() { - { "/dev/nvhost-as-gpu", ProcessIoctlNvGpuAS }, - { "/dev/nvhost-ctrl", ProcessIoctlNvHostCtrl }, - { "/dev/nvhost-ctrl-gpu", ProcessIoctlNvGpuGpu }, - { "/dev/nvhost-gpu", ProcessIoctlNvHostChannel }, - { "/dev/nvmap", ProcessIoctlNvMap } + { "/dev/nvhost-as-gpu", ProcessIoctlNvGpuAS }, + { "/dev/nvhost-ctrl", ProcessIoctlNvHostCtrl }, + { "/dev/nvhost-ctrl-gpu", ProcessIoctlNvGpuGpu }, + { "/dev/nvhost-gpu", ProcessIoctlNvHostGpu }, + { "/dev/nvmap", ProcessIoctlNvMap } }; public static GlobalStateTable Fds { get; private set; } @@ -44,6 +44,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nv { 3, Initialize }, { 4, QueryEvent }, { 8, SetClientPid }, + { 11, Ioctl }, { 13, FinishInitialize } }; @@ -162,9 +163,9 @@ namespace Ryujinx.HLE.OsHle.Services.Nv return ProcessIoctl(Context, Cmd, NvGpuGpuIoctl.ProcessIoctl); } - private static int ProcessIoctlNvHostChannel(ServiceCtx Context, int Cmd) + private static int ProcessIoctlNvHostGpu(ServiceCtx Context, int Cmd) { - return ProcessIoctl(Context, Cmd, NvHostChannelIoctl.ProcessIoctl); + return ProcessIoctl(Context, Cmd, NvHostChannelIoctl.ProcessIoctlGpu); } private static int ProcessIoctlNvMap(ServiceCtx Context, int Cmd) @@ -207,6 +208,8 @@ namespace Ryujinx.HLE.OsHle.Services.Nv NvGpuASIoctl.UnloadProcess(Process); + NvHostChannelIoctl.UnloadProcess(Process); + NvHostCtrlIoctl.UnloadProcess(Process); NvMapIoctl.UnloadProcess(Process); diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannel.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannel.cs new file mode 100644 index 000000000..486c38069 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannel.cs @@ -0,0 +1,7 @@ +namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel +{ + class NvChannel + { + public int Timeout; + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannelName.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannelName.cs new file mode 100644 index 000000000..a46a6d987 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvChannelName.cs @@ -0,0 +1,7 @@ +namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel +{ + enum NvChannelName + { + Gpu + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs index 8f3d3cd7d..411a579a2 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs @@ -3,23 +3,50 @@ using Ryujinx.HLE.Gpu.Memory; using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Services.Nv.NvGpuAS; using System; +using System.Collections.Concurrent; namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel { class NvHostChannelIoctl { - public static int ProcessIoctl(ServiceCtx Context, int Cmd) + private class ChannelsPerProcess + { + public ConcurrentDictionary Channels { get; private set; } + + public ChannelsPerProcess() + { + Channels = new ConcurrentDictionary(); + + Channels.TryAdd(NvChannelName.Gpu, new NvChannel()); + } + } + + private static ConcurrentDictionary Channels; + + static NvHostChannelIoctl() + { + Channels = new ConcurrentDictionary(); + } + + public static int ProcessIoctlGpu(ServiceCtx Context, int Cmd) + { + return ProcessIoctl(Context, NvChannelName.Gpu, Cmd); + } + + public static int ProcessIoctl(ServiceCtx Context, NvChannelName Channel, int Cmd) { switch (Cmd & 0xffff) { - case 0x4714: return SetUserData (Context); - case 0x4801: return SetNvMap (Context); - case 0x4808: return SubmitGpfifo (Context); - case 0x4809: return AllocObjCtx (Context); - case 0x480b: return ZcullBind (Context); - case 0x480c: return SetErrorNotifier(Context); - case 0x480d: return SetPriority (Context); - case 0x481a: return AllocGpfifoEx2 (Context); + case 0x4714: return SetUserData (Context); + case 0x4801: return SetNvMap (Context); + case 0x4803: return SetTimeout (Context, Channel); + case 0x4808: return SubmitGpfifo (Context); + case 0x4809: return AllocObjCtx (Context); + case 0x480b: return ZcullBind (Context); + case 0x480c: return SetErrorNotifier (Context); + case 0x480d: return SetPriority (Context); + case 0x481a: return AllocGpfifoEx2 (Context); + case 0x481b: return KickoffPbWithAttr(Context); } throw new NotImplementedException(Cmd.ToString("x8")); @@ -45,6 +72,15 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel return NvResult.Success; } + private static int SetTimeout(ServiceCtx Context, NvChannelName Channel) + { + long InputPosition = Context.Request.GetBufferType0x21().Position; + + GetChannel(Context, Channel).Timeout = Context.Memory.ReadInt32(InputPosition); + + return NvResult.Success; + } + private static int SubmitGpfifo(ServiceCtx Context) { long InputPosition = Context.Request.GetBufferType0x21().Position; @@ -58,15 +94,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel { long Gpfifo = Context.Memory.ReadInt64(InputPosition + 0x18 + Index * 8); - long VA = Gpfifo & 0xff_ffff_ffff; - - int Size = (int)(Gpfifo >> 40) & 0x7ffffc; - - byte[] Data = Vmm.ReadBytes(VA, Size); - - NvGpuPBEntry[] PushBuffer = NvGpuPushBuffer.Decode(Data); - - Context.Ns.Gpu.Fifo.PushBuffer(Vmm, PushBuffer); + PushGpfifo(Context, Vmm, Gpfifo); } Args.SyncptId = 0; @@ -126,5 +154,57 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel return NvResult.Success; } + + private static int KickoffPbWithAttr(ServiceCtx Context) + { + long InputPosition = Context.Request.GetBufferType0x21().Position; + long OutputPosition = Context.Request.GetBufferType0x22().Position; + + NvHostChannelSubmitGpfifo Args = AMemoryHelper.Read(Context.Memory, InputPosition); + + NvGpuVmm Vmm = NvGpuASIoctl.GetVmm(Context); + + for (int Index = 0; Index < Args.NumEntries; Index++) + { + long Gpfifo = Context.Memory.ReadInt64(Args.Address + Index * 8); + + PushGpfifo(Context, Vmm, Gpfifo); + } + + Args.SyncptId = 0; + Args.SyncptValue = 0; + + AMemoryHelper.Write(Context.Memory, OutputPosition, Args); + + return NvResult.Success; + } + + private static void PushGpfifo(ServiceCtx Context, NvGpuVmm Vmm, long Gpfifo) + { + long VA = Gpfifo & 0xff_ffff_ffff; + + int Size = (int)(Gpfifo >> 40) & 0x7ffffc; + + byte[] Data = Vmm.ReadBytes(VA, Size); + + NvGpuPBEntry[] PushBuffer = NvGpuPushBuffer.Decode(Data); + + Context.Ns.Gpu.Fifo.PushBuffer(Vmm, PushBuffer); + } + + public static NvChannel GetChannel(ServiceCtx Context, NvChannelName Channel) + { + ChannelsPerProcess Cpp = Channels.GetOrAdd(Context.Process, (Key) => + { + return new ChannelsPerProcess(); + }); + + return Cpp.Channels[Channel]; + } + + public static void UnloadProcess(Process Process) + { + Channels.TryRemove(Process, out _); + } } } \ No newline at end of file diff --git a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelSubmitGpfifo.cs b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelSubmitGpfifo.cs index 9541f7018..ee945839e 100644 --- a/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelSubmitGpfifo.cs +++ b/Ryujinx.HLE/OsHle/Services/Nv/NvHostChannel/NvHostChannelSubmitGpfifo.cs @@ -2,7 +2,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nv.NvHostChannel { struct NvHostChannelSubmitGpfifo { - public long Gpfifo; + public long Address; public int NumEntries; public int Flags; public int SyncptId; From 8652dbb57ca3a111c73b779a6f38ec24cc32c2f5 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 15 Jul 2018 00:34:12 -0300 Subject: [PATCH 19/23] Small nit on GetAudioRendererWorkBufferSize --- Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs index 021c36357..a71abebe3 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs @@ -67,7 +67,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud ((Params.SinkCount + Params.MixCount) * 0x3C0 + Params.SampleCount * 4) * (Params.Unknown8 + 6), 64); Size += (Params.SinkCount + Params.MixCount) * 0x2C0; - Size += (Params.EffectCount + 4 * Params.VoiceCount) * 0x30 + 0x50; + Size += (Params.EffectCount + Params.VoiceCount * 4) * 0x30 + 0x50; if (IsSplitterSupported) { From a48ed788bf0a0fa678ea0259271058cabec8e1b9 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 15 Jul 2018 00:37:30 -0300 Subject: [PATCH 20/23] Remove outdated comment --- Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs index a71abebe3..700b36556 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs @@ -51,7 +51,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud int Revision = (Params.Revision - Rev0Magic) >> 24; - if (Revision <= Rev) //REV3 Max is supported + if (Revision <= Rev) { bool IsSplitterSupported = Revision >= 3; From 50b706e2baef0a7a80af94de51fd9e3bd31ae1ff Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 15 Jul 2018 00:42:59 -0300 Subject: [PATCH 21/23] Fix RevMagic on audren --- Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs index 700b36556..5e2dec9f2 100644 --- a/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs +++ b/Ryujinx.HLE/OsHle/Services/Aud/IAudioRendererManager.cs @@ -18,7 +18,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud private const int Rev = 4; - public const int RevMagic = Rev0Magic + Rev; + public const int RevMagic = Rev0Magic + (Rev << 24); private Dictionary m_Commands; From 063fae50fe25388d10e9ec1915c561dc0f4d519d Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 15 Jul 2018 05:53:26 +0200 Subject: [PATCH 22/23] Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268) * Update CpuTestSimdArithmetic.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update Instructions.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdMove.cs * Delete CpuTestSimdMove.cs --- .../Instruction/AInstEmitSimdArithmetic.cs | 12 +- .../Instruction/AInstEmitSimdHelper.cs | 12 +- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 47 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 913 +++--- Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 41 +- Ryujinx.Tests/Cpu/CpuTestSimdMove.cs | 136 - Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 2532 ++++++++++------- Ryujinx.Tests/Cpu/Tester/Instructions.cs | 208 ++ 8 files changed, 2385 insertions(+), 1516 deletions(-) delete mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdMove.cs diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index a39ffc093..36bb1cbf1 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -163,12 +163,19 @@ namespace ChocolArm64.Instruction AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Elems = 8 >> Op.Size; + int ESize = 8 << Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; long RoundConst = 1L << (ESize - 1); + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); @@ -185,9 +192,12 @@ namespace ChocolArm64.Instruction Context.EmitLsr(ESize); - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 1f7a2dad1..7716e2987 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -813,6 +813,7 @@ namespace ChocolArm64.Instruction AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = !Scalar ? 8 >> Op.Size : 1; + int ESize = 8 << Op.Size; int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; @@ -823,6 +824,12 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I8(0L); Context.EmitSttmp(); + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { AILLabel LblLe = new AILLabel(); @@ -867,9 +874,12 @@ namespace ChocolArm64.Instruction EmitVectorZeroLower(Context, Op.Rd); } - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 739f01c62..592cab733 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = (Index & ~1) + Part; + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp(); @@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part + ((Index & (Half - 1)) << 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Pairs + Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); @@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; + int Base = Part != 0 ? Pairs : 0; - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part * Half + (Index >> 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp(); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 02c5b25b2..b84d29575 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -76,64 +76,53 @@ namespace Ryujinx.Tests.Cpu } #endregion + private const int RndCnt = 1; + [Test, Description("ABS , ")] - public void Abs_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Abs_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x5EE0B820; // ABS D0, D1 + uint Opcode = 0x5EE0B800; // ABS D0, D0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Abs_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("ABS ., .")] - public void Abs_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Abs_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E20B820; // ABS V0.8B, V1.8B + uint Opcode = 0x0E20B800; // ABS V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Pairwise, Description("ABS ., .")] - public void Abs_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> - { - uint Opcode = 0x4E20B820; // ABS V0.16B, V1.16B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); - - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); - Assert.Multiple(() => { Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); @@ -141,114 +130,157 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Pairwise, Description("ADDP , .")] - public void Addp_S_2DD([ValueSource("_1D_")] [Random(1)] ulong A0, - [ValueSource("_1D_")] [Random(1)] ulong A1) + [Test, Description("ABS ., .")] + public void Abs_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x5EF1B820; // ADDP D0, V1.2D + uint Opcode = 0x4E20B800; // ABS V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Abs_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("ADDP , .")] + public void Addp_S_2DD([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) + { + uint Opcode = 0x5EF1B800; // ADDP D0, V0.2D + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Addp_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("ADDV , .")] - public void Addv_V_8BB_4HH([ValueSource("_8B4H_")] [Random(1)] ulong A, - [Values(0b00u, 0b01u)] uint size) // <8B, 4H> + public void Addv_V_8BB_4HH([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u)] uint size) // <8BB, 4HH> { - uint Opcode = 0x0E31B820; // ADDV B0, V1.8B + uint Opcode = 0x0E31B800; // ADDV B0, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong())); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Addv_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("ADDV , .")] - public void Addv_V_16BB_8HH_4SS([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + [Test, Description("ADDV , .")] + public void Addv_V_16BB_8HH_4SS([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16BB, 8HH, 4SS> { - uint Opcode = 0x4E31B820; // ADDV B0, V1.16B + uint Opcode = 0x4E31B800; // ADDV B0, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong())); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Addv_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CLS ., .")] - public void Cls_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cls_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E204820; // CLS V0.8B, V1.8B + uint Opcode = 0x0E204800; // CLS V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cls_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CLS ., .")] - public void Cls_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, + [Test, Description("CLS ., .")] + public void Cls_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> { - uint Opcode = 0x4E204820; // CLS V0.16B, V1.16B + uint Opcode = 0x4E204800; // CLS V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cls_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -259,41 +291,50 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CLZ ., .")] - public void Clz_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Clz_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E204820; // CLZ V0.8B, V1.8B + uint Opcode = 0x2E204800; // CLZ V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Clz_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CLZ ., .")] - public void Clz_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, + [Test, Description("CLZ ., .")] + public void Clz_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> { - uint Opcode = 0x6E204820; // CLZ V0.16B, V1.16B + uint Opcode = 0x6E204800; // CLZ V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Clz_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -304,61 +345,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CMEQ , , #0")] - public void Cmeq_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Cmeq_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x5EE09820; // CMEQ D0, D1, #0 + uint Opcode = 0x5EE09800; // CMEQ D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmeq_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CMEQ ., ., #0")] - public void Cmeq_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cmeq_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E209820; // CMEQ V0.8B, V1.8B, #0 + uint Opcode = 0x0E209800; // CMEQ V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmeq_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CMEQ ., ., #0")] - public void Cmeq_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("CMEQ ., ., #0")] + public void Cmeq_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x4E209820; // CMEQ V0.16B, V1.16B, #0 + uint Opcode = 0x4E209800; // CMEQ V0.16B, V0.16B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cmeq_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -369,61 +424,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CMGE , , #0")] - public void Cmge_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Cmge_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x7EE08820; // CMGE D0, D1, #0 + uint Opcode = 0x7EE08800; // CMGE D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmge_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CMGE ., ., #0")] - public void Cmge_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cmge_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E208820; // CMGE V0.8B, V1.8B, #0 + uint Opcode = 0x2E208800; // CMGE V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmge_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CMGE ., ., #0")] - public void Cmge_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("CMGE ., ., #0")] + public void Cmge_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x6E208820; // CMGE V0.16B, V1.16B, #0 + uint Opcode = 0x6E208800; // CMGE V0.16B, V0.16B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cmge_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -434,61 +503,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CMGT , , #0")] - public void Cmgt_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Cmgt_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x5EE08820; // CMGT D0, D1, #0 + uint Opcode = 0x5EE08800; // CMGT D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmgt_Zero_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CMGT ., ., #0")] - public void Cmgt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cmgt_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E208820; // CMGT V0.8B, V1.8B, #0 + uint Opcode = 0x0E208800; // CMGT V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmgt_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CMGT ., ., #0")] - public void Cmgt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("CMGT ., ., #0")] + public void Cmgt_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x4E208820; // CMGT V0.16B, V1.16B, #0 + uint Opcode = 0x4E208800; // CMGT V0.16B, V0.16B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cmgt_Zero_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -499,61 +582,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CMLE , , #0")] - public void Cmle_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Cmle_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x7EE09820; // CMLE D0, D1, #0 + uint Opcode = 0x7EE09800; // CMLE D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmle_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CMLE ., ., #0")] - public void Cmle_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cmle_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E209820; // CMLE V0.8B, V1.8B, #0 + uint Opcode = 0x2E209800; // CMLE V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmle_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CMLE ., ., #0")] - public void Cmle_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("CMLE ., ., #0")] + public void Cmle_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x6E209820; // CMLE V0.16B, V1.16B, #0 + uint Opcode = 0x6E209800; // CMLE V0.16B, V0.16B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cmle_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -564,61 +661,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CMLT , , #0")] - public void Cmlt_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Cmlt_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x5EE0A820; // CMLT D0, D1, #0 + uint Opcode = 0x5EE0A800; // CMLT D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmlt_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("CMLT ., ., #0")] - public void Cmlt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Cmlt_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E20A820; // CMLT V0.8B, V1.8B, #0 + uint Opcode = 0x0E20A800; // CMLT V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cmlt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CMLT ., ., #0")] - public void Cmlt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("CMLT ., ., #0")] + public void Cmlt_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x4E20A820; // CMLT V0.16B, V1.16B, #0 + uint Opcode = 0x4E20A800; // CMLT V0.16B, V0.16B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cmlt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -629,37 +740,46 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("CNT ., .")] - public void Cnt_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + public void Cnt_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x0E205820; // CNT V0.8B, V1.8B + uint Opcode = 0x0E205800; // CNT V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("CNT ., .")] - public void Cnt_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1) + [Test, Description("CNT ., .")] + public void Cnt_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x4E205820; // CNT V0.16B, V1.16B + uint Opcode = 0x4E205800; // CNT V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Cnt_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -670,61 +790,75 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("NEG , ")] - public void Neg_S_D([ValueSource("_1D_")] [Random(1)] ulong A) + public void Neg_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x7EE0B820; // NEG D0, D1 + uint Opcode = 0x7EE0B800; // NEG D0, D0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Neg_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Description("NEG ., .")] - public void Neg_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Neg_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E20B820; // NEG V0.8B, V1.8B + uint Opcode = 0x2E20B800; // NEG V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Neg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("NEG ., .")] - public void Neg_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("NEG ., .")] + public void Neg_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x6E20B820; // NEG V0.16B, V1.16B + uint Opcode = 0x6E20B800; // NEG V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Neg_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -735,37 +869,46 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("NOT ., .")] - public void Not_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + public void Not_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x2E205820; // NOT V0.8B, V1.8B + uint Opcode = 0x2E205800; // NOT V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Not_V(Op[30], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("NOT ., .")] - public void Not_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1) + [Test, Description("NOT ., .")] + public void Not_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x6E205820; // NOT V0.16B, V1.16B + uint Opcode = 0x6E205800; // NOT V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Not_V(Op[30], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -776,37 +919,46 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("RBIT ., .")] - public void Rbit_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + public void Rbit_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x2E605820; // RBIT V0.8B, V1.8B + uint Opcode = 0x2E605800; // RBIT V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("RBIT ., .")] - public void Rbit_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1) + [Test, Description("RBIT ., .")] + public void Rbit_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x6E605820; // RBIT V0.16B, V1.16B + uint Opcode = 0x6E605800; // RBIT V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Rbit_V(Op[30], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -817,37 +969,46 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("REV16 ., .")] - public void Rev16_V_8B([ValueSource("_8B_")] [Random(1)] ulong A) + public void Rev16_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x0E201820; // REV16 V0.8B, V1.8B + uint Opcode = 0x0E201800; // REV16 V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("REV16 ., .")] - public void Rev16_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1) + [Test, Description("REV16 ., .")] + public void Rev16_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A) { - uint Opcode = 0x4E201820; // REV16 V0.16B, V1.16B + uint Opcode = 0x4E201800; // REV16 V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Rev16_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -858,41 +1019,50 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("REV32 ., .")] - public void Rev32_V_8B_4H([ValueSource("_8B4H_")] [Random(1)] ulong A, + public void Rev32_V_8B_4H([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u)] uint size) // <8B, 4H> { - uint Opcode = 0x2E200820; // REV32 V0.8B, V1.8B + uint Opcode = 0x2E200800; // REV32 V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("REV32 ., .")] - public void Rev32_V_16B_8H([ValueSource("_8B4H_")] [Random(1)] ulong A0, - [ValueSource("_8B4H_")] [Random(1)] ulong A1, + [Test, Description("REV32 ., .")] + public void Rev32_V_16B_8H([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u)] uint size) // <16B, 8H> { - uint Opcode = 0x6E200820; // REV32 V0.16B, V1.16B + uint Opcode = 0x6E200800; // REV32 V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Rev32_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -903,41 +1073,50 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("REV64 ., .")] - public void Rev64_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, + public void Rev64_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E200820; // REV64 V0.8B, V1.8B + uint Opcode = 0x0E200800; // REV64 V0.8B, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Pairwise, Description("REV64 ., .")] - public void Rev64_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, + [Test, Description("REV64 ., .")] + public void Rev64_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> { - uint Opcode = 0x4E200820; // REV64 V0.16B, V1.16B + uint Opcode = 0x4E200800; // REV64 V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Rev64_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -948,228 +1127,252 @@ namespace Ryujinx.Tests.Cpu } [Test, Description("SQXTN , ")] - public void Sqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A, + public void Sqxtn_S_HB_SH_DS([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // { - uint Opcode = 0x5E214820; // SQXTN B0, H1 + uint Opcode = 0x5E214800; // SQXTN B0, H0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong())); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Sqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("SQXTN{2} ., .")] - public void Sqxtn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("SQXTN{2} ., .")] + public void Sqxtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x0E214820; // SQXTN V0.8B, V1.8H + uint Opcode = 0x0E214800; // SQXTN V0.8B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("SQXTN{2} ., .")] - public void Sqxtn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("SQXTN{2} ., .")] + public void Sqxtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x4E214820; // SQXTN2 V0.16B, V1.8H + uint Opcode = 0x4E214800; // SQXTN2 V0.16B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Sqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } [Test, Description("SQXTUN , ")] - public void Sqxtun_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A, + public void Sqxtun_S_HB_SH_DS([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // { - uint Opcode = 0x7E212820; // SQXTUN B0, H1 + uint Opcode = 0x7E212800; // SQXTUN B0, H0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong())); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Sqxtun_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("SQXTUN{2} ., .")] - public void Sqxtun_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("SQXTUN{2} ., .")] + public void Sqxtun_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x2E212820; // SQXTUN V0.8B, V1.8H + uint Opcode = 0x2E212800; // SQXTUN V0.8B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("SQXTUN{2} ., .")] - public void Sqxtun_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("SQXTUN{2} ., .")] + public void Sqxtun_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x6E212820; // SQXTUN2 V0.16B, V1.8H + uint Opcode = 0x6E212800; // SQXTUN2 V0.16B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Sqxtun_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } [Test, Description("UQXTN , ")] - public void Uqxtn_S_HB_SH_DS([ValueSource("_1H1S1D_")] [Random(1)] ulong A, + public void Uqxtn_S_HB_SH_DS([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1H1S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // { - uint Opcode = 0x7E214820; // UQXTN B0, H1 + uint Opcode = 0x7E214800; // UQXTN B0, H0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong())); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); SimdFp.Uqxtn_S(Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("UQXTN{2} ., .")] - public void Uqxtn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("UQXTN{2} ., .")] + public void Uqxtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x2E214820; // UQXTN V0.8B, V1.8H + uint Opcode = 0x2E214800; // UQXTN V0.8B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } - [Test, Pairwise, Description("UQXTN{2} ., .")] - public void Uqxtn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, + [Test, Description("UQXTN{2} ., .")] + public void Uqxtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x6E214820; // UQXTN2 V0.16B, V1.8H + uint Opcode = 0x6E214800; // UQXTN2 V0.16B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); SimdFp.Uqxtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 2a0f5ed91..8e2d9a366 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -9,46 +9,6 @@ namespace Ryujinx.Tests.Cpu { public class CpuTestSimdArithmetic : CpuTest { - [TestCase(0xE228420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0xE228420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x0000000000000000ul)] - [TestCase(0xE228420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0x0000000000000000ul)] - [TestCase(0xE228420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)] - [TestCase(0x4E228420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x4E228420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x00000000FFFFFF00ul)] - [TestCase(0x4E228420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0xFEFEFEFEFEFEFEFEul)] - [TestCase(0x4E228420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)] - [TestCase(0xE628420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0xE628420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x0000000000000000ul)] - [TestCase(0xE628420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0x0000000000000000ul)] - [TestCase(0xE628420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)] - [TestCase(0x4E628420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x4E628420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x00000000FFFF0000ul)] - [TestCase(0x4E628420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0xFFFEFFFEFFFEFFFEul)] - [TestCase(0x4E628420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)] - [TestCase(0xEA28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0xEA28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0xEA28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0x0000000000000000ul)] - [TestCase(0xEA28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)] - [TestCase(0x4EA28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x4EA28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x4EA28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0xFFFFFFFEFFFFFFFEul)] - [TestCase(0x4EA28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)] - [TestCase(0x4EE28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x4EE28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000100000000ul, 0x0000000100000000ul)] - [TestCase(0x4EE28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFEul, 0xFFFFFFFFFFFFFFFEul)] - [TestCase(0x4EE28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)] - public void Add_V(uint Opcode, ulong A0, ulong A1, ulong B0, ulong B1, ulong Result0, ulong Result1) - { - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.Multiple(() => - { - Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0)); - Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0)); - }); - } - [TestCase(0x1E224820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000000000000ul)] [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000080000000ul, 0x0000000080000000ul)] @@ -195,6 +155,7 @@ namespace Ryujinx.Tests.Cpu V0: Sse.SetAllVector128(B)); float Result = (float)(2 - ((double)A * (double)B)); + Assert.Multiple(() => { Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs deleted file mode 100644 index 055e08689..000000000 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs +++ /dev/null @@ -1,136 +0,0 @@ -using ChocolArm64.State; - -using NUnit.Framework; - -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Tests.Cpu -{ - public class CpuTestSimdMove : CpuTest - { - [Test, Description("TRN1 V0.4S, V1.4S, V2.4S")] - public void Trn1_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3, - [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3) - { - uint Opcode = 0x4E822820; - Vector128 V1 = Sse.StaticCast(Sse2.SetVector128(A3, A2, A1, A0)); - Vector128 V2 = Sse.StaticCast(Sse2.SetVector128(B3, B2, B1, B0)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); - }); - } - - [Test, Description("TRN1 V0.8B, V1.8B, V2.8B")] - public void Trn1_V_8B([Random(2)] byte A0, [Random(1)] byte A1, [Random(2)] byte A2, [Random(1)] byte A3, - [Random(2)] byte A4, [Random(1)] byte A5, [Random(2)] byte A6, [Random(1)] byte A7, - [Random(2)] byte B0, [Random(1)] byte B1, [Random(2)] byte B2, [Random(1)] byte B3, - [Random(2)] byte B4, [Random(1)] byte B5, [Random(2)] byte B6, [Random(1)] byte B7) - { - uint Opcode = 0x0E022820; - Vector128 V1 = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0)); - Vector128 V2 = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B0)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B2)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A4)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B4)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A6)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B6)); - }); - } - - [Test, Description("TRN2 V0.4S, V1.4S, V2.4S")] - public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3, - [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3) - { - uint Opcode = 0x4E826820; - Vector128 V1 = Sse.StaticCast(Sse2.SetVector128(A3, A2, A1, A0)); - Vector128 V2 = Sse.StaticCast(Sse2.SetVector128(B3, B2, B1, B0)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); - }); - } - - [Test, Description("TRN2 V0.8B, V1.8B, V2.8B")] - public void Trn2_V_8B([Random(1)] byte A0, [Random(2)] byte A1, [Random(1)] byte A2, [Random(2)] byte A3, - [Random(1)] byte A4, [Random(2)] byte A5, [Random(1)] byte A6, [Random(2)] byte A7, - [Random(1)] byte B0, [Random(2)] byte B1, [Random(1)] byte B2, [Random(2)] byte B3, - [Random(1)] byte B4, [Random(2)] byte B5, [Random(1)] byte B6, [Random(2)] byte B7) - { - uint Opcode = 0x0E026820; - Vector128 V1 = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0)); - Vector128 V2 = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)0), Is.EqualTo(A1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)1), Is.EqualTo(B1)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)2), Is.EqualTo(A3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)3), Is.EqualTo(B3)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)4), Is.EqualTo(A5)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)5), Is.EqualTo(B5)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)6), Is.EqualTo(A7)); - Assert.That(Sse41.Extract(Sse.StaticCast(ThreadState.V0), (byte)7), Is.EqualTo(B7)); - }); - } - - [TestCase(0u, 0u, 0x2313221221112010ul, 0x0000000000000000ul)] - [TestCase(1u, 0u, 0x2313221221112010ul, 0x2717261625152414ul)] - [TestCase(0u, 1u, 0x2322131221201110ul, 0x0000000000000000ul)] - [TestCase(1u, 1u, 0x2322131221201110ul, 0x2726171625241514ul)] - [TestCase(0u, 2u, 0x2322212013121110ul, 0x0000000000000000ul)] - [TestCase(1u, 2u, 0x2322212013121110ul, 0x2726252417161514ul)] - [TestCase(1u, 3u, 0x1716151413121110ul, 0x2726252423222120ul)] - public void Zip1_V(uint Q, uint size, ulong Result_0, ulong Result_1) - { - // ZIP1 V0., V1., V2. - uint Opcode = 0x0E023820 | (Q << 30) | (size << 22); - Vector128 V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918); - Vector128 V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0)); - Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0)); - } - - [TestCase(0u, 0u, 0x2717261625152414ul, 0x0000000000000000ul)] - [TestCase(1u, 0u, 0x2B1B2A1A29192818ul, 0x2F1F2E1E2D1D2C1Cul)] - [TestCase(0u, 1u, 0x2726171625241514ul, 0x0000000000000000ul)] - [TestCase(1u, 1u, 0x2B2A1B1A29281918ul, 0x2F2E1F1E2D2C1D1Cul)] - [TestCase(0u, 2u, 0x2726252417161514ul, 0x0000000000000000ul)] - [TestCase(1u, 2u, 0x2B2A29281B1A1918ul, 0x2F2E2D2C1F1E1D1Cul)] - [TestCase(1u, 3u, 0x1F1E1D1C1B1A1918ul, 0x2F2E2D2C2B2A2928ul)] - public void Zip2_V(uint Q, uint size, ulong Result_0, ulong Result_1) - { - // ZIP2 V0., V1., V2. - uint Opcode = 0x0E027820 | (Q << 30) | (size << 22); - Vector128 V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918); - Vector128 V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0)); - Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0)); - } - } -} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 5e14f55d3..51db857c3 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -60,73 +60,90 @@ namespace Ryujinx.Tests.Cpu } #endregion - [Test, Description("ADD , , ")] - public void Add_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + private const int RndCnt = 4; + + [Test, Pairwise, Description("ADD , , ")] + public void Add_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x5EE28420; // ADD D0, D1, D2 + uint Opcode = 0x5EE08400; // ADD D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Add_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("ADD ., ., .")] - public void Add_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x0E228420; // ADD V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("ADD ., ., .")] - public void Add_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Add_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E228420; // ADD V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E208400; // ADD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ADD ., ., .")] + public void Add_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E208400; // ADD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Add_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -137,108 +154,89 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("ADDHN{2} ., ., .")] - public void Addhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Addhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x0E224020; // ADDHN V0.8B, V1.8H, V2.8H + uint Opcode = 0x0E204000; // ADDHN V0.8B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Addhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("ADDHN{2} ., ., .")] - public void Addhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Addhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x4E224020; // ADDHN2 V0.16B, V1.8H, V2.8H + uint Opcode = 0x4E204000; // ADDHN2 V0.16B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Addhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Description("ADDP ., ., .")] - public void Addp_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x0E22BC20; // ADDP V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - [Test, Pairwise, Description("ADDP ., ., .")] - public void Addp_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Addp_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E22BC20; // ADDP V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E20BC00; // ADDP V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -248,46 +246,57 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("AND ., ., .")] - public void And_V_8B([ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("ADDP ., ., .")] + public void Addp_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x0E221C20; // AND V0.8B, V1.8B, V2.8B + uint Opcode = 0x4E20BC00; // ADDP V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Addp_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("AND ., ., .")] - public void And_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void And_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x4E221C20; // AND V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E201C00; // AND V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -297,46 +306,55 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("BIC ., ., .")] - public void Bic_V_8B([ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("AND ., ., .")] + public void And_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x0E621C20; // BIC V0.8B, V1.8B, V2.8B + uint Opcode = 0x4E201C00; // AND V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.And_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("BIC ., ., .")] - public void Bic_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Bic_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x4E621C20; // BIC V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E601C00; // BIC V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -346,53 +364,55 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("BIF ., ., .")] - public void Bif_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z, - [ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("BIC ., ., .")] + public void Bic_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x2EE21C20; // BIF V0.8B, V1.8B, V2.8B + uint Opcode = 0x4E601C00; // BIC V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z)); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Bic_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("BIF ., ., .")] - public void Bif_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0, - [ValueSource("_8B_")] [Random(1)] ulong _Z1, - [ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Bif_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x6EE21C20; // BIF V0.16B, V1.16B, V2.16B + uint Opcode = 0x2EE01C00; // BIF V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -402,53 +422,55 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("BIT ., ., .")] - public void Bit_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z, - [ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("BIF ., ., .")] + public void Bif_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x2EA21C20; // BIT V0.8B, V1.8B, V2.8B + uint Opcode = 0x6EE01C00; // BIF V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z)); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Bif_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("BIT ., ., .")] - public void Bit_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0, - [ValueSource("_8B_")] [Random(1)] ulong _Z1, - [ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Bit_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x6EA21C20; // BIT V0.16B, V1.16B, V2.16B + uint Opcode = 0x2EA01C00; // BIT V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -458,53 +480,55 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("BSL ., ., .")] - public void Bsl_V_8B([ValueSource("_8B_")] [Random(1)] ulong _Z, - [ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("BIT ., ., .")] + public void Bit_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x2E621C20; // BSL V0.8B, V1.8B, V2.8B + uint Opcode = 0x6EA01C00; // BIT V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z)); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Bit_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("BSL ., ., .")] - public void Bsl_V_16B([ValueSource("_8B_")] [Random(1)] ulong _Z0, - [ValueSource("_8B_")] [Random(1)] ulong _Z1, - [ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Bsl_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x6E621C20; // BSL V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E601C00; // BSL V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -514,73 +538,86 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMEQ , , ")] - public void Cmeq_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("BSL ., ., .")] + public void Bsl_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x7EE28C20; // CMEQ D0, D1, D2 + uint Opcode = 0x6E601C00; // BSL V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Bsl_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMEQ , , ")] + public void Cmeq_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x7EE08C00; // CMEQ D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmeq_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMEQ ., ., .")] - public void Cmeq_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x2E228C20; // CMEQ V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMEQ ., ., .")] - public void Cmeq_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmeq_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x6E228C20; // CMEQ V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E208C00; // CMEQ V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -590,73 +627,88 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMGE , , ")] - public void Cmge_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMEQ ., ., .")] + public void Cmeq_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x5EE23C20; // CMGE D0, D1, D2 + uint Opcode = 0x6E208C00; // CMEQ V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmeq_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMGE , , ")] + public void Cmge_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x5EE03C00; // CMGE D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmge_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMGE ., ., .")] - public void Cmge_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x0E223C20; // CMGE V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMGE ., ., .")] - public void Cmge_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmge_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E223C20; // CMGE V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E203C00; // CMGE V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -666,73 +718,88 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMGT , , ")] - public void Cmgt_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMGE ., ., .")] + public void Cmge_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x5EE23420; // CMGT D0, D1, D2 + uint Opcode = 0x4E203C00; // CMGE V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmge_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMGT , , ")] + public void Cmgt_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x5EE03400; // CMGT D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmgt_Reg_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMGT ., ., .")] - public void Cmgt_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x0E223420; // CMGT V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMGT ., ., .")] - public void Cmgt_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmgt_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E223420; // CMGT V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E203400; // CMGT V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -742,73 +809,88 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMHI , , ")] - public void Cmhi_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMGT ., ., .")] + public void Cmgt_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x7EE23420; // CMHI D0, D1, D2 + uint Opcode = 0x4E203400; // CMGT V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmgt_Reg_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMHI , , ")] + public void Cmhi_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x7EE03400; // CMHI D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmhi_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMHI ., ., .")] - public void Cmhi_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x2E223420; // CMHI V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMHI ., ., .")] - public void Cmhi_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmhi_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x6E223420; // CMHI V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E203400; // CMHI V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -818,73 +900,88 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMHS , , ")] - public void Cmhs_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMHI ., ., .")] + public void Cmhi_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x7EE23C20; // CMHS D0, D1, D2 + uint Opcode = 0x6E203400; // CMHI V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmhi_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMHS , , ")] + public void Cmhs_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x7EE03C00; // CMHS D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmhs_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMHS ., ., .")] - public void Cmhs_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x2E223C20; // CMHS V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMHS ., ., .")] - public void Cmhs_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmhs_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x6E223C20; // CMHS V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E203C00; // CMHS V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -894,73 +991,88 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("CMTST , , ")] - public void Cmtst_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMHS ., ., .")] + public void Cmhs_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x5EE28C20; // CMTST D0, D1, D2 + uint Opcode = 0x6E203C00; // CMHS V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmhs_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("CMTST , , ")] + public void Cmtst_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x5EE08C00; // CMTST D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Cmtst_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("CMTST ., ., .")] - public void Cmtst_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x0E228C20; // CMTST V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("CMTST ., ., .")] - public void Cmtst_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Cmtst_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E228C20; // CMTST V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E208C00; // CMTST V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -970,46 +1082,57 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("EOR ., ., .")] - public void Eor_V_8B([ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("CMTST ., ., .")] + public void Cmtst_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> { - uint Opcode = 0x2E221C20; // EOR V0.8B, V1.8B, V2.8B + uint Opcode = 0x4E208C00; // CMTST V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Cmtst_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("EOR ., ., .")] - public void Eor_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Eor_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x6E221C20; // EOR V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E201C00; // EOR V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1019,46 +1142,55 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("ORN ., ., .")] - public void Orn_V_8B([ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("EOR ., ., .")] + public void Eor_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x0EE21C20; // ORN V0.8B, V1.8B, V2.8B + uint Opcode = 0x6E201C00; // EOR V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Eor_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("ORN ., ., .")] - public void Orn_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Orn_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x4EE21C20; // ORN V0.16B, V1.16B, V2.16B + uint Opcode = 0x0EE01C00; // ORN V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1068,46 +1200,84 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("ORR ., ., .")] - public void Orr_V_8B([ValueSource("_8B_")] [Random(1)] ulong A, - [ValueSource("_8B_")] [Random(1)] ulong B) + [Test, Pairwise, Description("ORN ., ., .")] + public void Orn_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x0EA21C20; // ORR V0.8B, V1.8B, V2.8B + uint Opcode = 0x4EE01C00; // ORN V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Orn_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("ORR ., ., .")] - public void Orr_V_16B([ValueSource("_8B_")] [Random(1)] ulong A0, - [ValueSource("_8B_")] [Random(1)] ulong A1, - [ValueSource("_8B_")] [Random(1)] ulong B0, - [ValueSource("_8B_")] [Random(1)] ulong B1) + public void Orr_V_8B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) { - uint Opcode = 0x4EA21C20; // ORR V0.16B, V1.16B, V2.16B + uint Opcode = 0x0EA01C00; // ORR V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ORR ., ., .")] + public void Orr_V_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x4EA01C00; // ORR V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Orr_V(Op[30], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1118,174 +1288,182 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("RADDHN{2} ., ., .")] - public void Raddhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Raddhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x2E224020; // RADDHN V0.8B, V1.8H, V2.8H + uint Opcode = 0x2E204000; // RADDHN V0.8B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Raddhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("RADDHN{2} ., ., .")] - public void Raddhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Raddhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x6E224020; // RADDHN2 V0.16B, V1.8H, V2.8H + uint Opcode = 0x6E204000; // RADDHN2 V0.16B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Raddhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("RSUBHN{2} ., ., .")] - public void Rsubhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Rsubhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x2E226020; // RSUBHN V0.8B, V1.8H, V2.8H + uint Opcode = 0x2E206000; // RSUBHN V0.8B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Pairwise, Description("RSUBHN{2} ., ., .")] - public void Rsubhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> - { - uint Opcode = 0x6E226020; // RSUBHN2 V0.16B, V1.8H, V2.8H - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); - SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Description("SABA ., ., .")] - public void Saba_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + [Test, Pairwise, Description("RSUBHN{2} ., ., .")] + public void Rsubhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x0E227C20; // SABA V0.8B, V1.8B, V2.8B + uint Opcode = 0x6E206000; // RSUBHN2 V0.16B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z)); - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Rsubhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("SABA ., ., .")] - public void Saba_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + public void Saba_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x4E227C20; // SABA V0.16B, V1.16B, V2.16B + uint Opcode = 0x0E207C00; // SABA V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SABA ., ., .")] + public void Saba_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x4E207C00; // SABA V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Saba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1296,25 +1474,27 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("SABAL{2} ., ., .")] - public void Sabal_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + public void Sabal_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D> { - uint Opcode = 0x0E225020; // SABAL V0.8H, V1.8B, V2.8B + uint Opcode = 0x0E205000; // SABAL V0.8H, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0(A0); - Vector128 V2 = MakeVectorE0(B0); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(2, 0, new Bits(B0)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); SimdFp.Sabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1325,25 +1505,27 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("SABAL{2} ., ., .")] - public void Sabal_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + public void Sabal_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> { - uint Opcode = 0x4E225020; // SABAL2 V0.8H, V1.16B, V2.16B + uint Opcode = 0x4E205000; // SABAL2 V0.8H, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE1(A1); - Vector128 V2 = MakeVectorE1(B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE1(A); + Vector128 V2 = MakeVectorE1(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Sabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1353,53 +1535,59 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("SABD ., ., .")] - public void Sabd_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, + [Test, Pairwise, Description("SABD ., ., .")] + public void Sabd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x0E227420; // SABD V0.8B, V1.8B, V2.8B + uint Opcode = 0x0E207400; // SABD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Sabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("SABD ., ., .")] - public void Sabd_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, + public void Sabd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> { - uint Opcode = 0x4E227420; // SABD V0.16B, V1.16B, V2.16B + uint Opcode = 0x4E207400; // SABD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Sabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1409,125 +1597,150 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("SABDL{2} ., ., .")] - public void Sabdl_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + [Test, Pairwise, Description("SABDL{2} ., ., .")] + public void Sabdl_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D> { - uint Opcode = 0x0E227020; // SABDL V0.8H, V1.8B, V2.8B + uint Opcode = 0x0E207000; // SABDL V0.8H, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A0); - Vector128 V2 = MakeVectorE0(B0); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(2, 0, new Bits(B0)); - SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); - }); - } - - [Test, Description("SABDL{2} ., ., .")] - public void Sabdl_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> - { - uint Opcode = 0x4E227020; // SABDL2 V0.8H, V1.16B, V2.16B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE1(A1); - Vector128 V2 = MakeVectorE1(B1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 1, new Bits(B1)); - SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); - }); - } - - [Test, Description("SUB , , ")] - public void Sub_S_D([ValueSource("_1D_")] [Random(1)] ulong A, - [ValueSource("_1D_")] [Random(1)] ulong B) - { - uint Opcode = 0x7EE28420; // SUB D0, D1, D2 - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SABDL{2} ., ., .")] + public void Sabdl_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> + { + uint Opcode = 0x4E207000; // SABDL2 V0.8H, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE1(A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Sabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SUB , , ")] + public void Sub_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_1D_")] [Random(RndCnt)] ulong B) + { + uint Opcode = 0x7EE08400; // SUB D0, D0, D0 + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Sub_S(Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); - }); - } - - [Test, Description("SUB ., ., .")] - public void Sub_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> - { - uint Opcode = 0x2E228420; // SUB V0.8B, V1.8B, V2.8B - Opcode |= ((size & 3) << 22); - Bits Op = new Bits(Opcode); - - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - AArch64.V(1, new Bits(A)); - AArch64.V(2, new Bits(B)); - SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("SUB ., ., .")] - public void Sub_V_16B_8H_4S_2D([ValueSource("_8B4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S1D_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + public void Sub_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x6E228420; // SUB V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E208400; // SUB V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SUB ., ., .")] + public void Sub_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x6E208400; // SUB V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Sub_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1538,115 +1751,244 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("SUBHN{2} ., ., .")] - public void Subhn_V_8H8B_4S4H_2D2S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Subhn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> { - uint Opcode = 0x0E226020; // SUBHN V0.8B, V1.8H, V2.8H + uint Opcode = 0x0E206000; // SUBHN V0.8B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Subhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("SUBHN{2} ., ., .")] - public void Subhn_V_8H16B_4S8H_2D4S([ValueSource("_4H2S1D_")] [Random(1)] ulong A0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong A1, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B0, - [ValueSource("_4H2S1D_")] [Random(1)] ulong B1, + public void Subhn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> { - uint Opcode = 0x4E226020; // SUBHN2 V0.16B, V1.8H, V2.8H + uint Opcode = 0x4E206000; // SUBHN2 V0.16B, V0.8H, V0.8H + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - ulong _E0 = TestContext.CurrentContext.Random.NextULong(); - Vector128 V0 = MakeVectorE0(_E0); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Subhn_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_E0)); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } - [Test, Description("UABA ., ., .")] - public void Uaba_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, + [Test, Pairwise, Description("TRN1 ., ., .")] + public void Trn1_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E227C20; // UABA V0.8B, V1.8B, V2.8B + uint Opcode = 0x0E002800; // TRN1 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); - SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + SimdFp.Trn1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("TRN1 ., ., .")] + public void Trn1_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E002800; // TRN1 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Trn1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("TRN2 ., ., .")] + public void Trn2_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E006800; // TRN2 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Trn2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("TRN2 ., ., .")] + public void Trn2_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E006800; // TRN2 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Trn2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("UABA ., ., .")] - public void Uaba_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + public void Uaba_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x6E227C20; // UABA V0.16B, V1.16B, V2.16B + uint Opcode = 0x2E207C00; // UABA V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UABA ., ., .")] + public void Uaba_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + { + uint Opcode = 0x6E207C00; // UABA V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Uaba_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1657,25 +1999,27 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("UABAL{2} ., ., .")] - public void Uabal_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + public void Uabal_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D> { - uint Opcode = 0x2E225020; // UABAL V0.8H, V1.8B, V2.8B + uint Opcode = 0x2E205000; // UABAL V0.8H, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE0(A0); - Vector128 V2 = MakeVectorE0(B0); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(2, 0, new Bits(B0)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); SimdFp.Uabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1686,25 +2030,27 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise, Description("UABAL{2} ., ., .")] - public void Uabal_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong _Z0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong _Z1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + public void Uabal_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> { - uint Opcode = 0x6E225020; // UABAL2 V0.8H, V1.16B, V2.16B + uint Opcode = 0x6E205000; // UABAL2 V0.8H, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(_Z0, _Z1); - Vector128 V1 = MakeVectorE1(A1); - Vector128 V2 = MakeVectorE1(B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE1(A); + Vector128 V2 = MakeVectorE1(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(0, 0, new Bits(_Z0)); - AArch64.Vpart(0, 1, new Bits(_Z1)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Uabal_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1714,53 +2060,59 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("UABD ., ., .")] - public void Uabd_V_8B_4H_2S([ValueSource("_8B4H2S_")] [Random(1)] ulong A, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B, + [Test, Pairwise, Description("UABD ., ., .")] + public void Uabd_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> { - uint Opcode = 0x2E227420; // UABD V0.8B, V1.8B, V2.8B + uint Opcode = 0x2E207400; // UABD V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); + Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); AArch64.V(1, new Bits(A)); AArch64.V(2, new Bits(B)); SimdFp.Uabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64())); - Assert.That(GetVectorE1(ThreadState.V0), Is.Zero); + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } [Test, Pairwise, Description("UABD ., ., .")] - public void Uabd_V_16B_8H_4S([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, + public void Uabd_V_16B_8H_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> { - uint Opcode = 0x6E227420; // UABD V0.16B, V1.16B, V2.16B + uint Opcode = 0x6E207400; // UABD V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0E1(A0, A1); - Vector128 V2 = MakeVectorE0E1(B0, B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 0, new Bits(B0)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Uabd_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1770,23 +2122,28 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("UABDL{2} ., ., .")] - public void Uabdl_V_8B8H_4H4S_2S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A0, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B0, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + [Test, Pairwise, Description("UABDL{2} ., ., .")] + public void Uabdl_V_8B8H_4H4S_2S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H, 4H4S, 2S2D> { - uint Opcode = 0x2E227020; // UABDL V0.8H, V1.8B, V2.8B + uint Opcode = 0x2E207000; // UABDL V0.8H, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE0(A0); - Vector128 V2 = MakeVectorE0(B0); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 0, new Bits(A0)); - AArch64.Vpart(2, 0, new Bits(B0)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); SimdFp.Uabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1796,23 +2153,28 @@ namespace Ryujinx.Tests.Cpu }); } - [Test, Description("UABDL{2} ., ., .")] - public void Uabdl_V_16B8H_8H4S_4S2D([ValueSource("_8B4H2S_")] [Random(1)] ulong A1, - [ValueSource("_8B4H2S_")] [Random(1)] ulong B1, - [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B, 8H, 4S> + [Test, Pairwise, Description("UABDL{2} ., ., .")] + public void Uabdl_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> { - uint Opcode = 0x6E227020; // UABDL2 V0.8H, V1.16B, V2.16B + uint Opcode = 0x6E207000; // UABDL2 V0.8H, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcode |= ((size & 3) << 22); Bits Op = new Bits(Opcode); - Vector128 V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), - TestContext.CurrentContext.Random.NextULong()); - Vector128 V1 = MakeVectorE1(A1); - Vector128 V2 = MakeVectorE1(B1); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE1(A); + Vector128 V2 = MakeVectorE1(B); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - AArch64.Vpart(1, 1, new Bits(A1)); - AArch64.Vpart(2, 1, new Bits(B1)); + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); SimdFp.Uabdl_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); Assert.Multiple(() => @@ -1821,6 +2183,254 @@ namespace Ryujinx.Tests.Cpu Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); }); } + + [Test, Pairwise, Description("UZP1 ., ., .")] + public void Uzp1_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E001800; // UZP1 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Uzp1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UZP1 ., ., .")] + public void Uzp1_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E001800; // UZP1 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uzp1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UZP2 ., ., .")] + public void Uzp2_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E005800; // UZP2 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Uzp2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UZP2 ., ., .")] + public void Uzp2_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E005800; // UZP2 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uzp2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ZIP1 ., ., .")] + public void Zip1_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E003800; // ZIP1 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Zip1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ZIP1 ., ., .")] + public void Zip1_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E003800; // ZIP1 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Zip1_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ZIP2 ., ., .")] + public void Zip2_V_8B_4H_2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B, 4H, 2S> + { + uint Opcode = 0x0E007800; // ZIP2 V0.8B, V0.8B, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + AArch64.V(2, new Bits(B)); + SimdFp.Zip2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("ZIP2 ., ., .")] + public void Zip2_V_16B_8H_4S_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint size) // <16B, 8H, 4S, 2D> + { + uint Opcode = 0x4E007800; // ZIP2 V0.16B, V0.16B, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Zip2_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index 1590019a7..68f83423b 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -4655,6 +4655,74 @@ namespace Ryujinx.Tests.Cpu.Tester Vpart(d, part, result); } + // trn1_advsimd.html + public static void Trn1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + int pairs = elements / 2; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + + for (int p = 0; p <= pairs - 1; p++) + { + Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize)); + Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize)); + } + + V(d, result); + } + + // trn2_advsimd.html + public static void Trn2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + int pairs = elements / 2; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + + for (int p = 0; p <= pairs - 1; p++) + { + Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize)); + Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize)); + } + + V(d, result); + } + // uaba_advsimd.html public static void Uaba_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -4832,6 +4900,146 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + + // uzp1_advsimd.html + public static void Uzp1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operandl = V(datasize, n); + Bits operandh = V(datasize, m); + + Bits zipped = Bits.Concat(operandh, operandl); + + for (int e = 0; e <= elements - 1; e++) + { + Elem(result, e, esize, Elem(zipped, 2 * e + part, esize)); + } + + V(d, result); + } + + // uzp2_advsimd.html + public static void Uzp2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operandl = V(datasize, n); + Bits operandh = V(datasize, m); + + Bits zipped = Bits.Concat(operandh, operandl); + + for (int e = 0; e <= elements - 1; e++) + { + Elem(result, e, esize, Elem(zipped, 2 * e + part, esize)); + } + + V(d, result); + } + + // zip1_advsimd.html + public static void Zip1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + int pairs = elements / 2; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + + int @base = part * pairs; + + for (int p = 0; p <= pairs - 1; p++) + { + Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize)); + Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize)); + } + + V(d, result); + } + + // zip2_advsimd.html + public static void Zip2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool op = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size:Q == '110' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / esize; + int part = (int)UInt(op); + int pairs = elements / 2; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand1 = V(datasize, n); + Bits operand2 = V(datasize, m); + + int @base = part * pairs; + + for (int p = 0; p <= pairs - 1; p++) + { + Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize)); + Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize)); + } + + V(d, result); + } #endregion } } From 3e13b40b353a61fe57d1bc1440e1db9bc133df08 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 15 Jul 2018 19:37:27 -0300 Subject: [PATCH 23/23] Add config key to dump shaders in local directory (#265) * Add config key to dump shaders in local directory * Address feedback --- Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 5 ++ Ryujinx.Graphics/Gal/ShaderDumper.cs | 96 ++++++++++++++++++++++++ Ryujinx.Graphics/GraphicsConfig.cs | 4 + Ryujinx/Config.cs | 2 + Ryujinx/Ryujinx.conf | 3 + 5 files changed, 110 insertions(+) create mode 100644 Ryujinx.Graphics/Gal/ShaderDumper.cs create mode 100644 Ryujinx.Graphics/GraphicsConfig.cs diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index c55a758b4..ad7177550 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -118,6 +118,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL if (IsDualVp) { + ShaderDumper.Dump(Memory, Position + 0x50, Type, "a"); + ShaderDumper.Dump(Memory, PositionB + 0x50, Type, "b"); + Program = Decompiler.Decompile( Memory, Position + 0x50, @@ -126,6 +129,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL } else { + ShaderDumper.Dump(Memory, Position + 0x50, Type); + Program = Decompiler.Decompile(Memory, Position + 0x50, Type); } diff --git a/Ryujinx.Graphics/Gal/ShaderDumper.cs b/Ryujinx.Graphics/Gal/ShaderDumper.cs new file mode 100644 index 000000000..7cd56b21e --- /dev/null +++ b/Ryujinx.Graphics/Gal/ShaderDumper.cs @@ -0,0 +1,96 @@ +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gal +{ + static class ShaderDumper + { + private static string RuntimeDir; + + private static int DumpIndex = 1; + + public static void Dump(IGalMemory Memory, long Position, GalShaderType Type, string ExtSuffix = "") + { + if (string.IsNullOrWhiteSpace(GraphicsConfig.ShadersDumpPath)) + { + return; + } + + string FileName = "Shader" + DumpIndex.ToString("d4") + "." + ShaderExtension(Type) + ExtSuffix + ".bin"; + + string FilePath = Path.Combine(DumpDir(), FileName); + + DumpIndex++; + + using (FileStream Output = File.Create(FilePath)) + using (BinaryWriter Writer = new BinaryWriter(Output)) + { + long Offset = 0; + + ulong Instruction = 0; + + //Dump until a NOP instruction is found + while ((Instruction >> 52 & 0xfff8) != 0x50b0) + { + uint Word0 = (uint)Memory.ReadInt32(Position + Offset + 0); + uint Word1 = (uint)Memory.ReadInt32(Position + Offset + 4); + + Instruction = Word0 | (ulong)Word1 << 32; + + //Zero instructions (other kind of NOP) stop immediatly, + //this is to avoid two rows of zeroes + if (Instruction == 0) + { + break; + } + + Writer.Write(Instruction); + + Offset += 8; + } + + //Align to meet nvdisasm requeriments + while (Offset % 0x20 != 0) + { + Writer.Write(0); + + Offset += 4; + } + } + } + + private static string DumpDir() + { + if (string.IsNullOrEmpty(RuntimeDir)) + { + int Index = 1; + + do + { + RuntimeDir = Path.Combine(GraphicsConfig.ShadersDumpPath, "Dumps" + Index.ToString("d2")); + + Index++; + } + while (Directory.Exists(RuntimeDir)); + + Directory.CreateDirectory(RuntimeDir); + } + + return RuntimeDir; + } + + private static string ShaderExtension(GalShaderType Type) + { + switch (Type) + { + case GalShaderType.Vertex: return "vert"; + case GalShaderType.TessControl: return "tesc"; + case GalShaderType.TessEvaluation: return "tese"; + case GalShaderType.Geometry: return "geom"; + case GalShaderType.Fragment: return "frag"; + + default: throw new ArgumentException(nameof(Type)); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/GraphicsConfig.cs b/Ryujinx.Graphics/GraphicsConfig.cs new file mode 100644 index 000000000..3e3ef4ffa --- /dev/null +++ b/Ryujinx.Graphics/GraphicsConfig.cs @@ -0,0 +1,4 @@ +public static class GraphicsConfig +{ + public static string ShadersDumpPath; +} diff --git a/Ryujinx/Config.cs b/Ryujinx/Config.cs index 940753ba5..0f346122a 100644 --- a/Ryujinx/Config.cs +++ b/Ryujinx/Config.cs @@ -29,6 +29,8 @@ namespace Ryujinx AOptimizations.DisableMemoryChecks = !Convert.ToBoolean(Parser.Value("Enable_Memory_Checks")); + GraphicsConfig.ShadersDumpPath = Parser.Value("Graphics_Shaders_Dump_Path"); + Log.SetEnable(LogLevel.Debug, Convert.ToBoolean(Parser.Value("Logging_Enable_Debug"))); Log.SetEnable(LogLevel.Stub, Convert.ToBoolean(Parser.Value("Logging_Enable_Stub"))); Log.SetEnable(LogLevel.Info, Convert.ToBoolean(Parser.Value("Logging_Enable_Info"))); diff --git a/Ryujinx/Ryujinx.conf b/Ryujinx/Ryujinx.conf index 59f7f859e..063bb2de4 100644 --- a/Ryujinx/Ryujinx.conf +++ b/Ryujinx/Ryujinx.conf @@ -1,6 +1,9 @@ #Enable cpu memory checks (slow) Enable_Memory_Checks = false +#Dump shaders in local directory (e.g. `C:\ShaderDumps`) +Graphics_Shaders_Dump_Path = + #Enable print debug logs Logging_Enable_Debug = false