diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index ec7b8bd91c..8567e1ce69 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -704,6 +704,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluImm)); SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsImm)); SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsReg)); + SetA32("<<<<0011001000001111000000000000", InstName.Nop, InstEmit32.Nop, typeof(OpCode32)); SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluImm)); SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsImm)); SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsReg)); @@ -878,9 +879,10 @@ namespace ARMeilleure.Decoders SetA32("1111001x1x<>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index fdc1bb469f..82f57d63e1 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -817,7 +817,7 @@ namespace ARMeilleure.Instructions } else { - EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2)); } } @@ -835,6 +835,66 @@ namespace ARMeilleure.Instructions } } + public static void Vpmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2)); + } + } + + public static void Vpmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + + public static void Vpmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2)); + } + } + + public static void Vpmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + public static void Vrev(ArmEmitterContext context) { OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index f11f9cc593..b484381fe8 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -34,7 +34,23 @@ namespace ARMeilleure.Instructions public static void Vmvn_I(ArmEmitterContext context) { - EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseExclusiveOr(op1, op1)); + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (op1) => + { + Operand mask = X86GetAllElements(context, -1L); + return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Vmvn_II(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1)); } public static void Vmov_GS(ArmEmitterContext context) diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 9bf319aaf5..e4d084560d 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -582,6 +582,8 @@ namespace ARMeilleure.Instructions Vnmls, Vorr, Vpadd, + Vpmax, + Vpmin, Vqrshrn, Vqrshrun, Vrev, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 135a45f035..2b4059ec29 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 2; //! To be incremented manually for each change to the ARMeilleure project. private const string BaseDir = "Ryujinx"; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs index 8c9627ce0f..aba0f02dbf 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -39,8 +39,8 @@ namespace Ryujinx.Tests.Cpu 0b1110_1 }; - uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 + uint cmodeOp = variants[variant]; if (q) @@ -49,11 +49,11 @@ namespace Ryujinx.Tests.Cpu } opcode |= ((cmodeOp & 1) << 5) | ((cmodeOp & 0x1e) << 7); - opcode |= ((q ? 1u : 0u) << 6); + opcode |= (q ? 1u : 0u) << 6; opcode |= (imm & 0xf) | ((imm & 0x70) << 12) | ((imm & 0x80) << 16); - opcode |= ((vd & 0x10) << 18); - opcode |= ((vd & 0xf) << 12); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; SingleOpcode(opcode); @@ -258,6 +258,82 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VMVN. , ")] + public void Vmvn([Range(0u, 1u, 2u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 2u, 4u, 8u)] uint vm, + [Values] bool q) + { + uint opcode = 0xf3b00580u; // VMVN D0, D0 + + if (q) + { + opcode |= 1 << 6; + vm <<= 1; + vd <<= 1; + } + + opcode |= (size & 0x3) << 18; + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf) << 0; + + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMVN.I
, #")] + public void Mvni_V([Range(0u, 7u)] uint variant, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(1u, 0xffu, RndCntImm)] uint imm, + [Values] bool q) + { + uint[] variants = + { + // I32 + 0b0000, + 0b0010, + 0b0100, + 0b0110, + + // I16 + 0b1000, + 0b1010, + + // I32 + 0b1100, + 0b1101, + }; + + uint opcode = 0xf2800030u; // VMVN.I32 D0, #0 + + uint cmodeOp = variants[variant]; + + if (q) + { + vd <<= 1; + } + + opcode |= (cmodeOp & 0xf) << 8; + opcode |= (q ? 1u : 0u) << 6; + opcode |= (imm & 0xf) | ((imm & 0x70) << 12) | ((imm & 0x80) << 16); + + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + + SingleOpcode(opcode); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VTRN. , ")] public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, [Values(0u, 1u, 2u, 3u)] uint vd, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 66db63bc49..1581e85044 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -12,6 +12,31 @@ namespace Ryujinx.Tests.Cpu { #if SimdReg32 +#region "ValueSource (Opcodes)" + private static uint[] _Vp_Add_Max_Min_F_() + { + return new uint[] + { + 0xf3000d00u, // VPADD.F32 D0, D0, D0 + 0xf3000f00u, // VPMAX.F32 D0, D0, D0 + 0xf3200f00u // VPMIN.F32 D0, D0, D0 + }; + } + + // VPADD does not have an unsigned flag, so we check the opcode before setting it. + private static uint VpaddI8 = 0xf2000b10u; // VPADD.I8 D0, D0, D0 + + private static uint[] _Vp_Add_Max_Min_I_() + { + return new uint[] + { + VpaddI8, + 0xf2000a00u, // VPMAX.S8 D0, D0, D0 + 0xf2000a10u // VPMIN.S8 D0, D0, D0 + }; + } +#endregion + #region "ValueSource (Types)" private static ulong[] _1B1H1S1D_() { @@ -296,7 +321,7 @@ namespace Ryujinx.Tests.Cpu { uint opcode = 0xf2800a00u; // VMLSL.S8 Q0, D0, D0 - opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); @@ -329,7 +354,7 @@ namespace Ryujinx.Tests.Cpu { uint opcode = 0xf2800c00u; // VMULL.S8 Q0, D0, D0 - opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); @@ -381,7 +406,7 @@ namespace Ryujinx.Tests.Cpu opcode |= 1 << 24; } - opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); @@ -397,22 +422,57 @@ namespace Ryujinx.Tests.Cpu } [Explicit] - [Test, Pairwise, Description("VPADD.f32 V0, V0, V0")] - public void Vpadd_f32([Values(0u)] uint rd, - [Range(0u, 7u)] uint rn, - [Range(0u, 7u)] uint rm) + [Test, Pairwise] + public void Vp_Add_Max_Min_F([ValueSource("_Vp_Add_Max_Min_F_")] uint opcode, + [Values(0u)] uint rd, + [Range(0u, 7u)] uint rn, + [Range(0u, 7u)] uint rm, + [ValueSource("_2S_F_")] ulong z0, + [ValueSource("_2S_F_")] ulong z1, + [ValueSource("_2S_F_")] ulong a0, + [ValueSource("_2S_F_")] ulong a1, + [ValueSource("_2S_F_")] ulong b0, + [ValueSource("_2S_F_")] ulong b1) { - // not currently a slow path test - just a sanity check for pairwise - uint opcode = 0xf3000d00u; // VPADD.F32 D0, D0, D0 - opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); var rnd = TestContext.CurrentContext.Random; - V128 v0 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); - V128 v1 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); - V128 v2 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); + V128 v2 = MakeVectorE0E1(b0, b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Vp_Add_Max_Min_I([ValueSource("_Vp_Add_Max_Min_I_")] uint opcode, + [Values(0u)] uint rd, + [Range(0u, 5u)] uint rn, + [Range(0u, 5u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u) + { + if (u && opcode != VpaddI8) + { + opcode |= 1 << 24; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);