From e603b7afbcdff0fc732304872f5a65d410c601f9 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 18 Nov 2018 03:41:16 +0100 Subject: [PATCH] Add Sse Opt. for S/Umax_V, S/Umin_V, S/Uaddw_V, S/Usubw_V, Fabs_S/V, Fneg_S/V Inst.; for Fcvtl_V, Fcvtn_V Inst.; and for Fcmp_S Inst.. Add/Improve other Sse Opt.. Add Tests. (#496) * Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update InstEmitSimdCmp.cs * Update SoftFloat.cs * Update InstEmitAluHelper.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdHelper.cs * Update VectorHelper.cs * Update InstEmitSimdCvt.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimd.cs * Update InstEmitSimdArithmetic.cs * Update OpCodeTable.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdCmp.cs * Update InstEmitSimdCvt.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Create CpuTestSimdFcond.cs * Update OpCodeTable.cs * Update InstEmitSimdMove.cs * Update CpuTestSimdIns.cs * Create CpuTestSimdExt.cs * Nit. * Update PackageReference. --- ChocolArm64/Instructions/InstEmitAluHelper.cs | 29 +- .../Instructions/InstEmitSimdArithmetic.cs | 748 ++++++++++++++++-- ChocolArm64/Instructions/InstEmitSimdCmp.cs | 261 ++++-- ChocolArm64/Instructions/InstEmitSimdCvt.cs | 172 ++-- .../Instructions/InstEmitSimdHelper.cs | 84 +- ChocolArm64/Instructions/InstEmitSimdMove.cs | 121 ++- ChocolArm64/Instructions/SoftFloat.cs | 74 ++ ChocolArm64/Instructions/VectorHelper.cs | 25 +- ChocolArm64/OpCodeTable.cs | 12 +- Ryujinx.Tests/Cpu/CpuTest.cs | 1 - Ryujinx.Tests/Cpu/CpuTestSimd.cs | 214 ++++- Ryujinx.Tests/Cpu/CpuTestSimdExt.cs | 73 ++ Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs | 178 +++++ Ryujinx.Tests/Cpu/CpuTestSimdIns.cs | 198 +++++ Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 194 +++++ Ryujinx.Tests/Ryujinx.Tests.csproj | 2 +- 16 files changed, 2049 insertions(+), 337 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdExt.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs diff --git a/ChocolArm64/Instructions/InstEmitAluHelper.cs b/ChocolArm64/Instructions/InstEmitAluHelper.cs index 613dd2340..97c505640 100644 --- a/ChocolArm64/Instructions/InstEmitAluHelper.cs +++ b/ChocolArm64/Instructions/InstEmitAluHelper.cs @@ -190,23 +190,32 @@ namespace ChocolArm64.Instructions } } - public static void EmitSetNzcv(ILEmitterCtx context, int nzcv) + public static void EmitSetNzcv(ILEmitterCtx context) { - context.EmitLdc_I4((nzcv >> 0) & 1); - + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.VBit); - context.EmitLdc_I4((nzcv >> 1) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.CBit); - context.EmitLdc_I4((nzcv >> 2) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.ZBit); - context.EmitLdc_I4((nzcv >> 3) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.NBit); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index 5668bb644..c05e9f946 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -186,18 +186,101 @@ namespace ChocolArm64.Instructions public static void Fabs_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fabs_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fadd_S(ILEmitterCtx context) @@ -283,7 +366,7 @@ namespace ChocolArm64.Instructions } } - public static void Fmadd_S(ILEmitterCtx context) + public static void Fmadd_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -450,22 +533,118 @@ namespace ChocolArm64.Instructions }); } - public static void Fmla_V(ILEmitterCtx context) + public static void Fmla_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } - public static void Fmla_Ve(ILEmitterCtx context) + public static void Fmla_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } public static void Fmls_Se(ILEmitterCtx context) @@ -477,25 +656,121 @@ namespace ChocolArm64.Instructions }); } - public static void Fmls_V(ILEmitterCtx context) + public static void Fmls_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmls_Ve(ILEmitterCtx context) + public static void Fmls_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmsub_S(ILEmitterCtx context) + public static void Fmsub_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -580,7 +855,59 @@ namespace ChocolArm64.Instructions public static void Fmul_Ve(ILEmitterCtx context) { - EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } } public static void Fmulx_S(ILEmitterCtx context) @@ -617,12 +944,95 @@ namespace ChocolArm64.Instructions public static void Fneg_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fneg_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fnmadd_S(ILEmitterCtx context) @@ -689,7 +1099,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frecps_S(ILEmitterCtx context) + public static void Frecps_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -743,7 +1153,7 @@ namespace ChocolArm64.Instructions } } - public static void Frecps_V(ILEmitterCtx context) + public static void Frecps_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -986,7 +1396,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frsqrts_S(ILEmitterCtx context) + public static void Frsqrts_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1048,7 +1458,7 @@ namespace ChocolArm64.Instructions } } - public static void Frsqrts_V(ILEmitterCtx context) + public static void Frsqrts_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1310,7 +1720,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1334,7 +1744,38 @@ namespace ChocolArm64.Instructions public static void Saddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } } public static void Shadd_V(ILEmitterCtx context) @@ -1439,11 +1880,34 @@ namespace ChocolArm64.Instructions public static void Smax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Smaxp_V(ILEmitterCtx context) @@ -1457,11 +1921,34 @@ namespace ChocolArm64.Instructions public static void Smin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Sminp_V(ILEmitterCtx context) @@ -1484,7 +1971,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1508,7 +1995,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -1535,7 +2022,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1559,7 +2046,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -1735,7 +2222,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1754,7 +2241,38 @@ namespace ChocolArm64.Instructions public static void Ssubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } } public static void Sub_S(ILEmitterCtx context) @@ -1901,7 +2419,38 @@ namespace ChocolArm64.Instructions public static void Uaddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } } public static void Uhadd_V(ILEmitterCtx context) @@ -1992,11 +2541,34 @@ namespace ChocolArm64.Instructions public static void Umax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Umaxp_V(ILEmitterCtx context) @@ -2010,11 +2582,34 @@ namespace ChocolArm64.Instructions public static void Umin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Uminp_V(ILEmitterCtx context) @@ -2037,7 +2632,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2061,7 +2656,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -2088,7 +2683,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2112,7 +2707,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -2251,7 +2846,38 @@ namespace ChocolArm64.Instructions public static void Usubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } } private static void EmitAbs(ILEmitterCtx context) diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs index c473c0aea..3ee254824 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitAluHelper; @@ -137,26 +138,43 @@ namespace ChocolArm64.Instructions context.EmitCondBranch(lblTrue, op.Cond); - EmitSetNzcv(context, op.Nzcv); + context.EmitLdc_I4(op.Nzcv); + EmitSetNzcv(context); context.Emit(OpCodes.Br, lblEnd); context.MarkLabel(lblTrue); - Fcmp_S(context); + EmitFcmpE(context, signalNaNs: false); context.MarkLabel(lblEnd); } public static void Fccmpe_S(ILEmitterCtx context) { - Fccmp_S(context); + OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + context.EmitLdc_I4(op.Nzcv); + EmitSetNzcv(context); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpE(context, signalNaNs: true); + + context.MarkLabel(lblEnd); } public static void Fcmeq_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar)); } @@ -169,7 +187,7 @@ namespace ChocolArm64.Instructions public static void Fcmeq_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual)); } @@ -182,7 +200,7 @@ namespace ChocolArm64.Instructions public static void Fcmge_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar)); } @@ -195,7 +213,7 @@ namespace ChocolArm64.Instructions public static void Fcmge_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual)); } @@ -208,7 +226,7 @@ namespace ChocolArm64.Instructions public static void Fcmgt_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar)); } @@ -221,7 +239,7 @@ namespace ChocolArm64.Instructions public static void Fcmgt_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan)); } @@ -252,31 +270,157 @@ namespace ChocolArm64.Instructions } public static void Fcmp_S(ILEmitterCtx context) + { + EmitFcmpE(context, signalNaNs: false); + } + + public static void Fcmpe_S(ILEmitterCtx context) + { + EmitFcmpE(context, signalNaNs: true); + } + + private static void EmitFcmpE(ILEmitterCtx context, bool signalNaNs) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false; - //Handle NaN case. - //If any number is NaN, then NZCV = 0011. - if (cmpWithZero) + if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitNaNCheck(context, op.Rn); + if (op.Size == 0) + { + Type[] typesCmp = new Type[] { typeof(Vector128), typeof(Vector128) }; + + ILLabel lblNaN = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitLdvec(op.Rn); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + if (cmpWithZero) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + } + else + { + context.EmitLdvec(op.Rm); + } + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp)); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); + + context.Emit(OpCodes.Brtrue_S, lblNaN); + + context.EmitLdc_I4(0); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp)); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblNaN); + + context.EmitLdc_I4(1); + context.Emit(OpCodes.Dup); + context.EmitLdc_I4(0); + context.Emit(OpCodes.Dup); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.MarkLabel(lblEnd); + } + else /* if (op.Size == 1) */ + { + Type[] typesCmp = new Type[] { typeof(Vector128), typeof(Vector128) }; + + ILLabel lblNaN = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + if (cmpWithZero) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + } + else + { + EmitLdvecWithCastToDouble(context, op.Rm); + } + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp)); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); + + context.Emit(OpCodes.Brtrue_S, lblNaN); + + context.EmitLdc_I4(0); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp)); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblNaN); + + context.EmitLdc_I4(1); + context.Emit(OpCodes.Dup); + context.EmitLdc_I4(0); + context.Emit(OpCodes.Dup); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.MarkLabel(lblEnd); + } } else - { - EmitNaNCheck(context, op.Rn); - EmitNaNCheck(context, op.Rm); - - context.Emit(OpCodes.Or); - } - - ILLabel lblNaN = new ILLabel(); - ILLabel lblEnd = new ILLabel(); - - context.Emit(OpCodes.Brtrue_S, lblNaN); - - void EmitLoadOpers() { EmitVectorExtractF(context, op.Rn, 0, op.Size); @@ -286,7 +430,7 @@ namespace ChocolArm64.Instructions { context.EmitLdc_R4(0f); } - else /* if (Op.Size == 1) */ + else // if (op.Size == 1) { context.EmitLdc_R8(0d); } @@ -295,67 +439,12 @@ namespace ChocolArm64.Instructions { EmitVectorExtractF(context, op.Rm, 0, op.Size); } - } - //Z = Rn == Rm - EmitLoadOpers(); + context.EmitLdc_I4(!signalNaNs ? 0 : 1); - context.Emit(OpCodes.Ceq); - context.Emit(OpCodes.Dup); + EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare)); - context.EmitStflg((int)PState.ZBit); - - //C = Rn >= Rm - EmitLoadOpers(); - - context.Emit(OpCodes.Cgt); - context.Emit(OpCodes.Or); - - context.EmitStflg((int)PState.CBit); - - //N = Rn < Rm - EmitLoadOpers(); - - context.Emit(OpCodes.Clt); - - context.EmitStflg((int)PState.NBit); - - //V = 0 - context.EmitLdc_I4(0); - - context.EmitStflg((int)PState.VBit); - - context.Emit(OpCodes.Br_S, lblEnd); - - context.MarkLabel(lblNaN); - - EmitSetNzcv(context, 0b0011); - - context.MarkLabel(lblEnd); - } - - public static void Fcmpe_S(ILEmitterCtx context) - { - Fcmp_S(context); - } - - private static void EmitNaNCheck(ILEmitterCtx context, int reg) - { - IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; - - EmitVectorExtractF(context, reg, 0, op.Size); - - if (op.Size == 0) - { - context.EmitCall(typeof(float), nameof(float.IsNaN)); - } - else if (op.Size == 1) - { - context.EmitCall(typeof(double), nameof(double.IsNaN)); - } - else - { - throw new InvalidOperationException(); + EmitSetNzcv(context); } } @@ -486,7 +575,7 @@ namespace ChocolArm64.Instructions { context.EmitLdc_R4(0f); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { context.EmitLdc_R8(0d); } diff --git a/ChocolArm64/Instructions/InstEmitSimdCvt.cs b/ChocolArm64/Instructions/InstEmitSimdCvt.cs index 45f2bef25..fe8722af3 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCvt.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCvt.cs @@ -76,33 +76,54 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - int elems = 4 >> sizeF; - - int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseSse2 && sizeF == 1) { - if (sizeF == 0) - { - EmitVectorExtractZx(context, op.Rn, part + index, 1); - context.Emit(OpCodes.Conv_U2); + Type[] typesMov = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { typeof(Vector128) }; - context.EmitLdarg(TranslatedSub.StateArgIdx); + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveHighToLow) + : nameof(Sse.MoveLowToHigh); - context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); - } - else /* if (sizeF == 1) */ - { - EmitVectorExtractF(context, op.Rn, part + index, 0); + context.EmitLdvec(op.Rn); + context.Emit(OpCodes.Dup); - context.Emit(OpCodes.Conv_R8); - } + context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); - EmitVectorInsertTmpF(context, index, sizeF); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt)); + + EmitStvecWithCastFromDouble(context, op.Rd); } + else + { + int elems = 4 >> sizeF; - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + EmitVectorExtractZx(context, op.Rn, part + index, 1); + context.Emit(OpCodes.Conv_U2); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); + } + else /* if (sizeF == 1) */ + { + EmitVectorExtractF(context, op.Rn, part + index, 0); + + context.Emit(OpCodes.Conv_R8); + } + + EmitVectorInsertTmpF(context, index, sizeF); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } } public static void Fcvtms_Gp(ILEmitterCtx context) @@ -121,43 +142,70 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - int elems = 4 >> sizeF; - - int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; - - if (part != 0) + if (Optimizations.UseSse2 && sizeF == 1) { + Type[] typesMov = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { typeof(Vector128) }; + + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveLowToHigh) + : nameof(Sse.MoveHighToLow); + context.EmitLdvec(op.Rd); - context.EmitStvectmp(); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + + EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); + context.Emit(OpCodes.Dup); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + + context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); + + context.EmitStvec(op.Rd); } - - for (int index = 0; index < elems; index++) + else { - EmitVectorExtractF(context, op.Rn, index, sizeF); + int elems = 4 >> sizeF; - if (sizeF == 0) + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (part != 0) { - context.EmitLdarg(TranslatedSub.StateArgIdx); - - context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert)); - - context.Emit(OpCodes.Conv_U8); - EmitVectorInsertTmp(context, part + index, 1); + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); } - else /* if (sizeF == 1) */ + + for (int index = 0; index < elems; index++) { - context.Emit(OpCodes.Conv_R4); + EmitVectorExtractF(context, op.Rn, index, sizeF); - EmitVectorInsertTmpF(context, part + index, 0); + if (sizeF == 0) + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert)); + + context.Emit(OpCodes.Conv_U8); + EmitVectorInsertTmp(context, part + index, 1); + } + else /* if (sizeF == 1) */ + { + context.Emit(OpCodes.Conv_R4); + + EmitVectorInsertTmpF(context, part + index, 0); + } } - } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); - if (part == 0) - { - EmitVectorZeroUpper(context, op.Rd); + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } } } @@ -260,7 +308,29 @@ namespace ChocolArm64.Instructions public static void Scvtf_V(ILEmitterCtx context) { - EmitVectorCvtf(context, signed: true); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + Type[] typesCvt = new Type[] { typeof(Vector128) }; + + EmitLdvecWithSignedCast(context, op.Rn, 2); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorCvtf(context, signed: true); + } } public static void Ucvtf_Gp(ILEmitterCtx context) @@ -441,16 +511,6 @@ namespace ChocolArm64.Instructions context.EmitStintzr(op.Rd); } - private static void EmitVectorScvtf(ILEmitterCtx context) - { - EmitVectorCvtf(context, true); - } - - private static void EmitVectorUcvtf(ILEmitterCtx context) - { - EmitVectorCvtf(context, false); - } - private static void EmitVectorCvtf(ILEmitterCtx context, bool signed) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index fad515102..7b597be37 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -219,7 +219,7 @@ namespace ChocolArm64.Instructions type = typeof(Sse); baseType = typeof(Vector128); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { type = typeof(Sse2); baseType = typeof(Vector128); @@ -249,7 +249,7 @@ namespace ChocolArm64.Instructions { EmitVectorZero32_128(context, op.Rd); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { EmitVectorZeroUpper(context, op.Rd); } @@ -272,7 +272,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) }); } @@ -292,7 +292,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) }); } @@ -312,7 +312,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); } @@ -334,7 +334,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) }); } @@ -961,7 +961,7 @@ namespace ChocolArm64.Instructions { EmitSatQ(context, op.Size, true, true); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { EmitUnarySignedSatQAbsOrNeg(context); } @@ -1022,7 +1022,7 @@ namespace ChocolArm64.Instructions { for (int index = 0; index < elems; index++) { - EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); if (op.Size <= 2) @@ -1031,13 +1031,13 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { if (add) { EmitBinarySatQAdd(context, signed); } - else /* if (Sub) */ + else /* if (sub) */ { EmitBinarySatQSub(context, signed); } @@ -1059,7 +1059,7 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { EmitBinarySatQAccumulate(context, signed); } @@ -1071,7 +1071,7 @@ namespace ChocolArm64.Instructions { for (int index = 0; index < elems; index++) { - EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); emit(); @@ -1304,52 +1304,64 @@ namespace ChocolArm64.Instructions } } - public static void EmitVectorZeroAll(ILEmitterCtx context, int rd) + public static void EmitVectorZeroAll(ILEmitterCtx context, int reg) { - if (Optimizations.UseSse2) + if (Optimizations.UseSse) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitStvec(rd); + context.EmitStvec(reg); } else { - EmitVectorZeroLower(context, rd); - EmitVectorZeroUpper(context, rd); + EmitVectorZeroLower(context, reg); + EmitVectorZeroUpper(context, reg); } } - public static void EmitVectorZeroLower(ILEmitterCtx context, int rd) + public static void EmitVectorZeroLower(ILEmitterCtx context, int reg) { - EmitVectorInsert(context, rd, 0, 3, 0); + EmitVectorInsert(context, reg, 0, 3, 0); } public static void EmitVectorZeroLowerTmp(ILEmitterCtx context) { - EmitVectorInsertTmp(context, 0, 3, 0); + if (Optimizations.UseSse) + { + context.EmitLdvectmp(); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow))); + + context.EmitStvectmp(); + } + else + { + EmitVectorInsertTmp(context, 0, 3, 0); + } } public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg) { - if (Optimizations.UseSse2) + if (Optimizations.UseSse) { - //TODO: Use MoveScalar once it is fixed, as of the - //time of writing it just crashes the JIT. + //TODO: Use Sse2.MoveScalar once it is fixed, + //as of the time of writing it just crashes the JIT (SDK 2.1.500). + + /*Type[] typesMov = new Type[] { typeof(Vector128) }; + EmitLdvecWithUnsignedCast(context, reg, 3); - Type[] types = new Type[] { typeof(Vector128), typeof(byte) }; + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov)); - //Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types)); + EmitStvecWithUnsignedCast(context, reg, 3);*/ - context.EmitLdc_I4(8); + context.EmitLdvec(reg); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), types)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); - context.EmitLdc_I4(8); - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), types)); - - EmitStvecWithUnsignedCast(context, reg, 3); + context.EmitStvec(reg); } else { @@ -1359,9 +1371,15 @@ namespace ChocolArm64.Instructions public static void EmitVectorZero32_128(ILEmitterCtx context, int reg) { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitLdvec(reg); - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorZero32_128)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveScalar))); context.EmitStvec(reg); } diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs index 3f539b8ad..0d9aa3121 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMove.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitSimdHelper; @@ -17,6 +18,8 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse2) { + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + context.EmitLdintzr(op.Rn); switch (op.Size) @@ -26,16 +29,9 @@ namespace ChocolArm64.Instructions case 2: context.Emit(OpCodes.Conv_U4); break; } - Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); EmitStvecWithUnsignedCast(context, op.Rd, op.Size); - - if (op.RegisterSize == RegisterSize.Simd64) - { - EmitVectorZeroUpper(context, op.Rd); - } } else { @@ -48,11 +44,11 @@ namespace ChocolArm64.Instructions EmitVectorInsert(context, op.Rd, index, op.Size); } + } - if (op.RegisterSize == RegisterSize.Simd64) - { - EmitVectorZeroUpper(context, op.Rd); - } + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); } } @@ -69,14 +65,34 @@ namespace ChocolArm64.Instructions { OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; - int bytes = op.GetBitsCount() >> 3; - int elems = bytes >> op.Size; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseSse2) { + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); - EmitVectorInsert(context, op.Rd, index, op.Size); + switch (op.Size) + { + case 0: context.Emit(OpCodes.Conv_U1); break; + case 1: context.Emit(OpCodes.Conv_U2); break; + case 2: context.Emit(OpCodes.Conv_U4); break; + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + } + else + { + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } } if (op.RegisterSize == RegisterSize.Simd64) @@ -89,32 +105,65 @@ namespace ChocolArm64.Instructions { OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; - context.EmitLdvec(op.Rd); - context.EmitStvectmp(); - - int bytes = op.GetBitsCount() >> 3; - - int position = op.Imm4; - - for (int index = 0; index < bytes; index++) + if (Optimizations.UseSse2) { - int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) }; + Type[] typesOr = new Type[] { typeof(Vector128), typeof(Vector128) }; - if (position == bytes) + EmitLdvecWithUnsignedCast(context, op.Rn, 0); + + if (op.RegisterSize == RegisterSize.Simd64) { - position = 0; + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); } - EmitVectorExtractZx(context, reg, position++, 0); - EmitVectorInsertTmp(context, index, 0); + context.EmitLdc_I4(op.Imm4); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs)); + + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + + context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); + + EmitStvecWithUnsignedCast(context, op.Rd, 0); } - - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if (op.RegisterSize == RegisterSize.Simd64) + else { - EmitVectorZeroUpper(context, op.Rd); + int bytes = op.GetBitsCount() >> 3; + + int position = op.Imm4; + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + if (position == bytes) + { + position = 0; + } + + EmitVectorExtractZx(context, reg, position++, 0); + EmitVectorInsertTmp(context, index, 0); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } } } diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs index 72b39efc4..2af8afbd0 100644 --- a/ChocolArm64/Instructions/SoftFloat.cs +++ b/ChocolArm64/Instructions/SoftFloat.cs @@ -789,6 +789,43 @@ namespace ChocolArm64.Instructions return result; } + public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); + + int result; + + if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN) + { + result = 0b0011; + + if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + public static float FPDiv(float value1, float value2, CpuThreadState state) { Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); @@ -1584,6 +1621,43 @@ namespace ChocolArm64.Instructions return result; } + public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); + + int result; + + if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN) + { + result = 0b0011; + + if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + public static double FPDiv(double value1, double value2, CpuThreadState state) { Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); diff --git a/ChocolArm64/Instructions/VectorHelper.cs b/ChocolArm64/Instructions/VectorHelper.cs index 8ef15818c..f02c131e6 100644 --- a/ChocolArm64/Instructions/VectorHelper.cs +++ b/ChocolArm64/Instructions/VectorHelper.cs @@ -9,18 +9,6 @@ namespace ChocolArm64.Instructions { static class VectorHelper { - private static readonly Vector128 Zero32128Mask; - - static VectorHelper() - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - Zero32128Mask = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0xffffffff)); - } - public static void EmitCall(ILEmitterCtx context, string name64, string name128) { bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64; @@ -491,7 +479,7 @@ namespace ChocolArm64.Instructions { int intValue = BitConverter.SingleToInt32Bits(value); - ushort low = (ushort)(intValue >> 0); + ushort low = (ushort)(intValue >> 0); ushort high = (ushort)(intValue >> 16); Vector128 shortVector = Sse.StaticCast(vector); @@ -578,17 +566,6 @@ namespace ChocolArm64.Instructions throw new PlatformNotSupportedException(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorZero32_128(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.And(vector, Zero32128Mask); - } - - throw new PlatformNotSupportedException(); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorSingleToSByte(Vector128 vector) { diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs index 6b1a724d5..8151718f3 100644 --- a/ChocolArm64/OpCodeTable.cs +++ b/ChocolArm64/OpCodeTable.cs @@ -216,9 +216,9 @@ namespace ChocolArm64 SetA64("01011110111xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_S, typeof(OpCodeSimdReg64)); SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_V, typeof(OpCodeSimdReg64)); SetA64("0x00111000100000010110xxxxxxxxxx", InstEmit.Cnt_V, typeof(OpCodeSimd64)); - SetA64("0x001110000xxxxx000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64)); + SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_S, typeof(OpCodeSimdIns64)); - SetA64("0x001110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64)); + SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64)); SetA64("0>101110000xxxxx00011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64)); SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64)); SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V, typeof(OpCodeSimd64)); SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V, typeof(OpCodeSimdReg64)); @@ -486,9 +486,9 @@ namespace ChocolArm64 SetA64("001011100x110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64)); - SetA64("x0011110xx100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); + SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64)); - SetA64("0x1011100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); + SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V, typeof(OpCodeSimdReg64)); diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index 564d546e1..b970e0554 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -333,7 +333,6 @@ namespace Ryujinx.Tests.Cpu Assert.That(_thread.ThreadState.V29, Is.EqualTo(_unicornEmu.Q[29])); Assert.That(_thread.ThreadState.V30, Is.EqualTo(_unicornEmu.Q[30])); Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31])); - Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31])); Assert.That(_thread.ThreadState.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index c5d806b9a..54889eee3 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -39,6 +39,18 @@ namespace Ryujinx.Tests.Cpu 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } + private static ulong[] _1S_() + { + return new ulong[] { 0x0000000000000000ul, 0x000000007FFFFFFFul, + 0x0000000080000000ul, 0x00000000FFFFFFFFul }; + } + + private static ulong[] _2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + private static ulong[] _4H2S1D_() { return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, @@ -244,6 +256,24 @@ namespace Ryujinx.Tests.Cpu #endregion #region "ValueSource (Opcodes)" + private static uint[] _F_Cmp_Cmpe_S_S_() + { + return new uint[] + { + 0x1E202028u, // FCMP S1, #0.0 + 0x1E202038u // FCMPE S1, #0.0 + }; + } + + private static uint[] _F_Cmp_Cmpe_S_D_() + { + return new uint[] + { + 0x1E602028u, // FCMP D1, #0.0 + 0x1E602038u // FCMPE D1, #0.0 + }; + } + private static uint[] _F_Cvt_S_SD_() { return new uint[] @@ -336,37 +366,81 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_Recpx_Sqrt_S_S_() + private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_S_() { return new uint[] { + 0x1E20C020u, // FABS S0, S1 + 0x1E214020u, // FNEG S0, S1 0x5EA1F820u, // FRECPX S0, S1 0x1E21C020u // FSQRT S0, S1 }; } - private static uint[] _F_Recpx_Sqrt_S_D_() + private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_D_() { return new uint[] { + 0x1E60C020u, // FABS D0, D1 + 0x1E614020u, // FNEG D0, D1 0x5EE1F820u, // FRECPX D0, D1 0x1E61C020u // FSQRT D0, D1 }; } - private static uint[] _F_Sqrt_V_2S_4S_() + private static uint[] _F_Abs_Neg_Sqrt_V_2S_4S_() { return new uint[] { - 0x2EA1F800u // FSQRT V0.2S, V0.2S + 0x0EA0F800u, // FABS V0.2S, V0.2S + 0x2EA0F800u, // FNEG V0.2S, V0.2S + 0x2EA1F800u // FSQRT V0.2S, V0.2S }; } - private static uint[] _F_Sqrt_V_2D_() + private static uint[] _F_Abs_Neg_Sqrt_V_2D_() { return new uint[] { - 0x6EE1F800u // FSQRT V0.2D, V0.2D + 0x4EE0F800u, // FABS V0.2D, V0.2D + 0x6EE0F800u, // FNEG V0.2D, V0.2D + 0x6EE1F800u // FSQRT V0.2D, V0.2D + }; + } + + private static uint[] _SU_Cvt_F_S_S_() + { + return new uint[] + { + 0x5E21D820u, // SCVTF S0, S1 + 0x7E21D820u // UCVTF S0, S1 + }; + } + + private static uint[] _SU_Cvt_F_S_D_() + { + return new uint[] + { + 0x5E61D820u, // SCVTF D0, D1 + 0x7E61D820u // UCVTF D0, D1 + }; + } + + private static uint[] _SU_Cvt_F_V_2S_4S_() + { + return new uint[] + { + 0x0E21D800u, // SCVTF V0.2S, V0.2S + 0x2E21D800u // UCVTF V0.2S, V0.2S + }; + } + + private static uint[] _SU_Cvt_F_V_2D_() + { + return new uint[] + { + 0x4E61D800u, // SCVTF V0.2D, V0.2D + 0x6E61D800u // UCVTF V0.2D, V0.2D }; } @@ -889,6 +963,38 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] [Explicit] + public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes, + [ValueSource("_1S_F_")] ulong a) + { + Vector128 v1 = MakeVectorE0(a); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } + + [Test, Pairwise] [Explicit] + public void F_Cmp_Cmpe_S_D([ValueSource("_F_Cmp_Cmpe_S_D_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a) + { + Vector128 v1 = MakeVectorE0(a); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } + [Test, Pairwise] [Explicit] public void F_Cvt_S_SD([ValueSource("_F_Cvt_S_SD_")] uint opcodes, [ValueSource("_1S_F_")] ulong a) @@ -1070,12 +1176,12 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Recpx_Sqrt_S_S([ValueSource("_F_Recpx_Sqrt_S_S_")] uint opcodes, - [ValueSource("_1S_F_")] ulong a) + public void F_Abs_Neg_Recpx_Sqrt_S_S([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_S_")] uint opcodes, + [ValueSource("_1S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); Vector128 v0 = MakeVectorE0E1(z, z); - Vector128 v1 = MakeVectorE0(a); + Vector128 v1 = MakeVectorE0E1(a, z); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1088,12 +1194,12 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Recpx_Sqrt_S_D([ValueSource("_F_Recpx_Sqrt_S_D_")] uint opcodes, - [ValueSource("_1D_F_")] ulong a) + public void F_Abs_Neg_Recpx_Sqrt_S_D([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_D_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); Vector128 v0 = MakeVectorE1(z); - Vector128 v1 = MakeVectorE0(a); + Vector128 v1 = MakeVectorE0E1(a, z); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1106,12 +1212,12 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Sqrt_V_2S_4S([ValueSource("_F_Sqrt_V_2S_4S_")] uint opcodes, - [Values(0u)] uint rd, - [Values(1u, 0u)] uint rn, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + public void F_Abs_Neg_Sqrt_V_2S_4S([ValueSource("_F_Abs_Neg_Sqrt_V_2S_4S_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_2S_F_")] ulong z, + [ValueSource("_2S_F_")] ulong a, + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((q & 1) << 30); @@ -1130,11 +1236,11 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Sqrt_V_2D([ValueSource("_F_Sqrt_V_2D_")] uint opcodes, - [Values(0u)] uint rd, - [Values(1u, 0u)] uint rn, - [ValueSource("_1D_F_")] ulong z, - [ValueSource("_1D_F_")] ulong a) + public void F_Abs_Neg_Sqrt_V_2D([ValueSource("_F_Abs_Neg_Sqrt_V_2D_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_1D_F_")] ulong z, + [ValueSource("_1D_F_")] ulong a) { opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); @@ -1460,6 +1566,68 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_S([ValueSource("_SU_Cvt_F_S_S_")] uint opcodes, + [ValueSource("_1S_")] [Random(RndCnt)] ulong a) + { + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_S_D([ValueSource("_SU_Cvt_F_S_D_")] uint opcodes, + [ValueSource("_1D_")] [Random(RndCnt)] ulong a) + { + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE1(z); + Vector128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_V_2S_4S([ValueSource("_SU_Cvt_F_V_2S_4S_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong a, + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + { + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a * q); + + SingleOpcode(opcodes, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void SU_Cvt_F_V_2D([ValueSource("_SU_Cvt_F_V_2D_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong a) + { + opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcodes, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] public void Sha1h_Sha1su1_V([ValueSource("_Sha1h_Sha1su1_V_")] uint opcodes, [Values(0u)] uint rd, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs new file mode 100644 index 000000000..f232989f7 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs @@ -0,0 +1,73 @@ +#define SimdExt + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdExt")] + public sealed class CpuTestSimdExt : CpuTest + { +#if SimdExt + +#region "ValueSource" + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("EXT .8B, .8B, .8B, #")] + public void Ext_V_8B([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B_")] [Random(RndCnt)] ulong b, + [Range(0u, 7u)] uint index) + { + uint imm4 = index & 0x7u; + + uint opcode = 0x2E000000; // EXT V0.8B, V0.8B, V0.8B, #0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm4 << 11); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("EXT .16B, .16B, .16B, #")] + public void Ext_V_16B([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B_")] [Random(RndCnt)] ulong b, + [Range(0u, 15u)] uint index) + { + uint imm4 = index & 0xFu; + + uint opcode = 0x6E000000; // EXT V0.16B, V0.16B, V0.16B, #0 + opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm4 << 11); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + Vector128 v2 = MakeVectorE0E1(b, b); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs new file mode 100644 index 000000000..48efc18fd --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs @@ -0,0 +1,178 @@ +#define SimdFcond + +using NUnit.Framework; + +using System.Collections.Generic; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdFcond")] + public sealed class CpuTestSimdFcond : CpuTest + { +#if SimdFcond + +#region "ValueSource (Types)" + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; + } + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _F_Ccmp_Ccmpe_S_S_() + { + return new uint[] + { + 0x1E220420u, // FCCMP S1, S2, #0, EQ + 0x1E220430u // FCCMPE S1, S2, #0, EQ + }; + } + + private static uint[] _F_Ccmp_Ccmpe_S_D_() + { + return new uint[] + { + 0x1E620420u, // FCCMP D1, D2, #0, EQ + 0x1E620430u // FCCMPE D1, D2, #0, EQ + }; + } +#endregion + + private const int RndCnt = 2; + private const int RndCntNzcv = 2; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Test, Pairwise] [Explicit] + public void F_Ccmp_Ccmpe_S_S([ValueSource("_F_Ccmp_Ccmpe_S_S_")] uint opcodes, + [ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b, + [Random(0u, 15u, RndCntNzcv)] uint nzcv, + [Values(0b0000u, 0b0001u, 0b0010u, 0b0011u, // + { + opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0); + + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } + + [Test, Pairwise] [Explicit] + public void F_Ccmp_Ccmpe_S_D([ValueSource("_F_Ccmp_Ccmpe_S_D_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a, + [ValueSource("_1D_F_")] ulong b, + [Random(0u, 15u, RndCntNzcv)] uint nzcv, + [Values(0b0000u, 0b0001u, 0b0010u, 0b0011u, // + { + opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0); + + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs index f4d6ed8e8..4ca54a2b4 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -18,6 +18,24 @@ namespace Ryujinx.Tests.Cpu 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } + private static ulong[] _2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } + private static ulong[] _8B4H_() { return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, @@ -89,6 +107,186 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("DUP B0, V1.B[]")] + public void Dup_S_B([ValueSource("_8B_")] [Random(RndCnt)] ulong a, + [Range(0u, 15u)] uint index) + { + const int size = 0; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x5E000420; // RESERVED + opcode |= (imm5 << 16); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP H0, V1.H[]")] + public void Dup_S_H([ValueSource("_4H_")] [Random(RndCnt)] ulong a, + [Range(0u, 7u)] uint index) + { + const int size = 1; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x5E000420; // RESERVED + opcode |= (imm5 << 16); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP S0, V1.S[]")] + public void Dup_S_S([ValueSource("_2S_")] [Random(RndCnt)] ulong a, + [Range(0u, 3u)] uint index) + { + const int size = 2; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x5E000420; // RESERVED + opcode |= (imm5 << 16); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP D0, V1.D[]")] + public void Dup_S_D([ValueSource("_1D_")] [Random(RndCnt)] ulong a, + [Range(0u, 1u)] uint index) + { + const int size = 3; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x5E000420; // RESERVED + opcode |= (imm5 << 16); + + ulong z = TestContext.CurrentContext.Random.NextULong(); + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP ., .B[]")] + public void Dup_V_8B_16B([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong a, + [Range(0u, 15u)] uint index, + [Values(0b0u, 0b1u)] uint q) // <8B, 16B> + { + const int size = 0; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x0E000400; // RESERVED + opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm5 << 16); + opcode |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP ., .H[]")] + public void Dup_V_4H_8H([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_4H_")] [Random(RndCnt)] ulong z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong a, + [Range(0u, 7u)] uint index, + [Values(0b0u, 0b1u)] uint q) // <4H, 8H> + { + const int size = 1; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x0E000400; // RESERVED + opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm5 << 16); + opcode |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP ., .S[]")] + public void Dup_V_2S_4S([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong a, + [Range(0u, 3u)] uint index, + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + { + const int size = 2; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x0E000400; // RESERVED + opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm5 << 16); + opcode |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP ., .D[]")] + public void Dup_V_2D([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong a, + [Range(0u, 1u)] uint index, + [Values(0b1u)] uint q) // <2D> + { + const int size = 3; + + uint imm5 = (index << (size + 1) | 1u << size) & 0x1Fu; + + uint opcode = 0x0E000400; // RESERVED + opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); + opcode |= (imm5 << 16); + opcode |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("SMOV , .[]")] public void Smov_S_W([Values(0u, 31u)] uint rd, [Values(1u)] uint rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 434237253..d43447a7a 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -250,6 +250,24 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _F_Cmp_Cmpe_S_S_() + { + return new uint[] + { + 0x1E222020u, // FCMP S1, S2 + 0x1E222030u // FCMPE S1, S2 + }; + } + + private static uint[] _F_Cmp_Cmpe_S_D_() + { + return new uint[] + { + 0x1E622020u, // FCMP D1, D2 + 0x1E622030u // FCMPE D1, D2 + }; + } + private static uint[] _F_Madd_Msub_S_S_() { return new uint[] @@ -316,6 +334,24 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _F_Mla_Mls_V_2S_4S_() + { + return new uint[] + { + 0x0E20CC00u, // FMLA V0.2S, V0.2S, V0.2S + 0x0EA0CC00u // FMLS V0.2S, V0.2S, V0.2S + }; + } + + private static uint[] _F_Mla_Mls_V_2D_() + { + return new uint[] + { + 0x4E60CC00u, // FMLA V0.2D, V0.2D, V0.2D + 0x4EE0CC00u // FMLS V0.2D, V0.2D, V0.2D + }; + } + private static uint[] _F_Recps_Rsqrts_S_S_() { return new uint[] @@ -372,6 +408,28 @@ namespace Ryujinx.Tests.Cpu 0x5E006000u // SHA256SU1 V0.4S, V0.4S, V0.4S }; } + + private static uint[] _S_Max_Min_P_V_() + { + return new uint[] + { + 0x0E206400u, // SMAX V0.8B, V0.8B, V0.8B + 0x0E20A400u, // SMAXP V0.8B, V0.8B, V0.8B + 0x0E206C00u, // SMIN V0.8B, V0.8B, V0.8B + 0x0E20AC00u // SMINP V0.8B, V0.8B, V0.8B + }; + } + + private static uint[] _U_Max_Min_P_V_() + { + return new uint[] + { + 0x2E206400u, // UMAX V0.8B, V0.8B, V0.8B + 0x2E20A400u, // UMAXP V0.8B, V0.8B, V0.8B + 0x2E206C00u, // UMIN V0.8B, V0.8B, V0.8B + 0x2E20AC00u // UMINP V0.8B, V0.8B, V0.8B + }; + } #endregion private const int RndCnt = 2; @@ -1248,6 +1306,42 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Idc); } + [Test, Pairwise] [Explicit] + public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes, + [ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b) + { + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } + + [Test, Pairwise] [Explicit] + public void F_Cmp_Cmpe_S_D([ValueSource("_F_Cmp_Cmpe_S_D_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a, + [ValueSource("_1D_F_")] ulong b) + { + Vector128 v1 = MakeVectorE0(a); + Vector128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + SingleOpcode(opcodes, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc); + } + [Test, Pairwise] [Explicit] // Fused. public void F_Madd_Msub_S_S([ValueSource("_F_Madd_Msub_S_S_")] uint opcodes, [ValueSource("_1S_F_")] ulong a, @@ -1384,6 +1478,58 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc); } + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_V_2S_4S([ValueSource("_F_Mla_Mls_V_2S_4S_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_2S_F_")] ulong z, + [ValueSource("_2S_F_")] ulong a, + [ValueSource("_2S_F_")] ulong b, + [Values(0b0u, 0b1u)] uint q) // <2S, 4S> + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a * q); + Vector128 v2 = MakeVectorE0E1(b, b * q); + + int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); + + int fpcr = rnd & (1 << (int)Fpcr.Fz); + fpcr |= rnd & (1 << (int)Fpcr.Dn); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr); + + CompareAgainstUnicorn(Fpsr.Ioc | Fpsr.Idc, FpSkips.IfUnderflow, FpTolerances.UpToOneUlpsS); + } + + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_V_2D([ValueSource("_F_Mla_Mls_V_2D_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_1D_F_")] ulong z, + [ValueSource("_1D_F_")] ulong a, + [ValueSource("_1D_F_")] ulong b) + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a); + Vector128 v2 = MakeVectorE0E1(b, b); + + int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); + + int fpcr = rnd & (1 << (int)Fpcr.Fz); + fpcr |= rnd & (1 << (int)Fpcr.Dn); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr); + + CompareAgainstUnicorn(Fpsr.Ioc | Fpsr.Idc, FpSkips.IfUnderflow, FpTolerances.UpToOneUlpsD); + } + [Test, Pairwise] [Explicit] // Fused. public void F_Recps_Rsqrts_S_S([ValueSource("_F_Recps_Rsqrts_S_S_")] uint opcodes, [ValueSource("_1S_F_")] ulong a, @@ -2036,6 +2182,30 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void S_Max_Min_P_V([ValueSource("_S_Max_Min_P_V_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b, + [Values(0b00u, 0b01u, 0b10u)] uint size, // Q0: <8B, 4H, 2S> + [Values(0b0u, 0b1u)] uint q) // Q1: <16B, 8H, 4S> + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((size & 3) << 22); + opcodes |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a * q); + Vector128 v2 = MakeVectorE0E1(b, b * q); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("SMLAL{2} ., ., .")] public void Smlal_V_8B8H_4H4S_2S2D([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, @@ -3068,6 +3238,30 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void U_Max_Min_P_V([ValueSource("_U_Max_Min_P_V_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b, + [Values(0b00u, 0b01u, 0b10u)] uint size, // Q0: <8B, 4H, 2S> + [Values(0b0u, 0b1u)] uint q) // Q1: <16B, 8H, 4S> + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + opcodes |= ((size & 3) << 22); + opcodes |= ((q & 1) << 30); + + Vector128 v0 = MakeVectorE0E1(z, z); + Vector128 v1 = MakeVectorE0E1(a, a * q); + Vector128 v2 = MakeVectorE0E1(b, b * q); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("UMLAL{2} ., ., .")] public void Umlal_V_8B8H_4H4S_2S2D([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj index fb21c6863..35405c769 100644 --- a/Ryujinx.Tests/Ryujinx.Tests.csproj +++ b/Ryujinx.Tests/Ryujinx.Tests.csproj @@ -18,7 +18,7 @@ - +