From 4b36d4f1ea965f6df4f61d506c1a9103c4da7938 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 6 Mar 2025 18:21:26 -0800 Subject: [PATCH 1/2] JIT: Fixe scalar reciprocal when AFP is supported Somehow I had completely missed this and recent reciprocal tests have exposed it as a problem. When AFP is supported but not RPRES then we were hitting this code path. We were failing to insert in to the destination correctly, which because the reciprocal is calculated using fdiv using a synthesized constant, this would just zero the remaining portion of the register. --- FEXCore/Source/Interface/Core/JIT/VectorOps.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp index 9ab04d8013..5cd0381868 100644 --- a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp @@ -556,7 +556,12 @@ DEF_OP(VFRecpScalarInsert) { auto Src = *std::get_if(&SrcVar); fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f); - fdiv(SubRegSize.Scalar, Dst, VTMP1, Src); + if (HostSupportsAFP) { + fdiv(SubRegSize.Scalar, VTMP1, VTMP1, Src); + ins(SubRegSize.Vector, Dst, 0, VTMP1, 0); + } else { + fdiv(SubRegSize.Scalar, Dst, VTMP1, Src); + } }; auto ScalarEmitRPRES = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant SrcVar) { From b76f8197599aedd4f3c369cd36c1fac918e896af Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 6 Mar 2025 18:48:43 -0800 Subject: [PATCH 2/2] InstcountCI: Update --- unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json | 5 +++-- unittests/InstructionCountCI/AFP/Secondary_REP.json | 5 +++-- unittests/InstructionCountCI/AFP/VEX_map1.json | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json index 9e4d6adf8d..740bd30532 100644 --- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json @@ -61,14 +61,15 @@ ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s0, s0, s17", + "mov v16.s[0], v0.s[0]" ] }, "addss xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/AFP/Secondary_REP.json b/unittests/InstructionCountCI/AFP/Secondary_REP.json index 45633e7be2..c32c9efd90 100644 --- a/unittests/InstructionCountCI/AFP/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/Secondary_REP.json @@ -61,14 +61,15 @@ ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s0, s0, s17", + "mov v16.s[0], v0.s[0]" ] }, "addss xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/AFP/VEX_map1.json b/unittests/InstructionCountCI/AFP/VEX_map1.json index 1a8ba2e258..88016e3a3d 100644 --- a/unittests/InstructionCountCI/AFP/VEX_map1.json +++ b/unittests/InstructionCountCI/AFP/VEX_map1.json @@ -46,7 +46,7 @@ ] }, "vrcpss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": [ "FEAT_FPRES could make this more optimal", "Map 1 0b10 0x53 128-bit" @@ -54,7 +54,8 @@ "ExpectedArm64ASM": [ "mov v16.16b, v17.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s18" + "fdiv s0, s0, s18", + "mov v16.s[0], v0.s[0]" ] }, "vcmpss xmm0, xmm1, xmm2, 0x00": {