diff --git a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp index 9ab04d8013..5cd0381868 100644 --- a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp @@ -556,7 +556,12 @@ DEF_OP(VFRecpScalarInsert) { auto Src = *std::get_if(&SrcVar); fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f); - fdiv(SubRegSize.Scalar, Dst, VTMP1, Src); + if (HostSupportsAFP) { + fdiv(SubRegSize.Scalar, VTMP1, VTMP1, Src); + ins(SubRegSize.Vector, Dst, 0, VTMP1, 0); + } else { + fdiv(SubRegSize.Scalar, Dst, VTMP1, Src); + } }; auto ScalarEmitRPRES = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant SrcVar) { diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json index 9e4d6adf8d..740bd30532 100644 --- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json @@ -61,14 +61,15 @@ ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s0, s0, s17", + "mov v16.s[0], v0.s[0]" ] }, "addss xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/AFP/Secondary_REP.json b/unittests/InstructionCountCI/AFP/Secondary_REP.json index 45633e7be2..c32c9efd90 100644 --- a/unittests/InstructionCountCI/AFP/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/Secondary_REP.json @@ -61,14 +61,15 @@ ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s0, s0, s17", + "mov v16.s[0], v0.s[0]" ] }, "addss xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/AFP/VEX_map1.json b/unittests/InstructionCountCI/AFP/VEX_map1.json index 1a8ba2e258..88016e3a3d 100644 --- a/unittests/InstructionCountCI/AFP/VEX_map1.json +++ b/unittests/InstructionCountCI/AFP/VEX_map1.json @@ -46,7 +46,7 @@ ] }, "vrcpss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": [ "FEAT_FPRES could make this more optimal", "Map 1 0b10 0x53 128-bit" @@ -54,7 +54,8 @@ "ExpectedArm64ASM": [ "mov v16.16b, v17.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s18" + "fdiv s0, s0, s18", + "mov v16.s[0], v0.s[0]" ] }, "vcmpss xmm0, xmm1, xmm2, 0x00": {