Skip to content

Commit

Permalink
Merge pull request #4392 from Sonicadvance1/fix_scalar_recip
Browse files Browse the repository at this point in the history
JIT: Fixe scalar reciprocal when AFP is supported
  • Loading branch information
lioncash authored Mar 7, 2025
2 parents a8bc20f + b76f819 commit 7fcb5fc
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 7 deletions.
7 changes: 6 additions & 1 deletion FEXCore/Source/Interface/Core/JIT/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,12 @@ DEF_OP(VFRecpScalarInsert) {
auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);

fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f);
fdiv(SubRegSize.Scalar, Dst, VTMP1, Src);
if (HostSupportsAFP) {
fdiv(SubRegSize.Scalar, VTMP1, VTMP1, Src);
ins(SubRegSize.Vector, Dst, 0, VTMP1, 0);
} else {
fdiv(SubRegSize.Scalar, Dst, VTMP1, Src);
}
};

auto ScalarEmitRPRES = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
Expand Down
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@
]
},
"rcpss xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 3,
"Comment": [
"FEAT_FPRES could make this more optimal",
"0xf3 0x0f 0x53"
],
"ExpectedArm64ASM": [
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s17"
"fdiv s0, s0, s17",
"mov v16.s[0], v0.s[0]"
]
},
"addss xmm0, xmm1": {
Expand Down
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@
]
},
"rcpss xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 3,
"Comment": [
"FEAT_FPRES could make this more optimal",
"0xf3 0x0f 0x53"
],
"ExpectedArm64ASM": [
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s17"
"fdiv s0, s0, s17",
"mov v16.s[0], v0.s[0]"
]
},
"addss xmm0, xmm1": {
Expand Down
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/VEX_map1.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,16 @@
]
},
"vrcpss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 4,
"Comment": [
"FEAT_FPRES could make this more optimal",
"Map 1 0b10 0x53 128-bit"
],
"ExpectedArm64ASM": [
"mov v16.16b, v17.16b",
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s18"
"fdiv s0, s0, s18",
"mov v16.s[0], v0.s[0]"
]
},
"vcmpss xmm0, xmm1, xmm2, 0x00": {
Expand Down

0 comments on commit 7fcb5fc

Please sign in to comment.