Skip to content

Commit

Permalink
Merge pull request #4374 from Sonicadvance1/float_packed_min_max_afp
Browse files Browse the repository at this point in the history
JIT: Optimize packed float min/max if AFP is supported
  • Loading branch information
lioncash authored Feb 25, 2025
2 parents 717015b + 69cfc78 commit 4f46f55
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 47 deletions.
15 changes: 14 additions & 1 deletion FEXCore/Source/Interface/Core/JIT/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1356,13 +1356,14 @@ DEF_OP(VFMin) {
const auto Vector1 = GetVReg(Op->Vector1.ID());
const auto Vector2 = GetVReg(Op->Vector2.ID());

// NOTE: We don't directly use FMIN here for any of the implementations,
// NOTE: We don't directly use FMIN** here for any of the implementations,
// because it has undesirable NaN handling behavior (it sets
// entries either to the incoming NaN value*, or the default NaN
// depending on FPCR flags set). We want behavior that sets NaN
// entries to zero for the comparison result.
//
// * - Not exactly (differs slightly with SNaNs), but close enough for the explanation
// ** - Unless the host supports AFP.AH, which allows FMIN/FMAX to select the second source element as expected of x86.

if (HostSupportsSVE256 && Is256Bit) {
const auto Mask = PRED_TMP_32B;
Expand Down Expand Up @@ -1391,6 +1392,12 @@ DEF_OP(VFMin) {
} else {
LOGMAN_THROW_A_FMT(!IsScalar, "should use VFMinScalarInsert instead");

if (HostSupportsAFP) {
// AFP.AH lets fmin behave like x86 min
fmin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
return;
}

if (Dst == Vector1) {
// Destination is already Vector1, need to insert Vector2 on false.
fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
Expand Down Expand Up @@ -1443,6 +1450,12 @@ DEF_OP(VFMax) {
} else {
LOGMAN_THROW_A_FMT(!IsScalar, "should use VFMaxScalarInsert instead");

if (HostSupportsAFP) {
// AFP.AH lets fmax behave like x86 max
fmax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
return;
}

if (Dst == Vector1) {
// Destination is already Vector1, need to insert Vector2 on true.
fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
Expand Down
137 changes: 99 additions & 38 deletions unittests/ASM/FEX_bugs/MinMaxNaN.asm
Original file line number Diff line number Diff line change
Expand Up @@ -47,63 +47,124 @@

%endmacro

%macro min 3
%macro case_d 4
; Load sources
mov rax, %2
mov rbx, %3
movq xmm0, rax
movq xmm1, rbx

; Calculate scalar min/max
%1sd xmm0, xmm1

; Check result
movq rcx, xmm0
mov rdx, %4
cmp rcx, rdx
jne fexi_fexi_im_so_broken
mov rcx, 0

; Now try the SSE vector
%1pd xmm0, xmm1
movq rcx, xmm0
mov rdx, %4
cmp rcx, rdx
jne fexi_fexi_im_so_broken
mov rcx, 0

; And the AVX-128 version
v%1pd xmm2, xmm0, xmm1
movq rcx, xmm2
mov rdx, %4
cmp rcx, rdx
jne fexi_fexi_im_so_broken
mov rcx, 0

; And the AVX-256 version
v%1pd ymm2, ymm0, ymm1
movq rcx, xmm2
mov rdx, %4
cmp rcx, rdx
jne fexi_fexi_im_so_broken
%endmacro

%macro min_s 3
single_case min, %1, %2, %3
%endmacro

%macro max 3
%macro max_s 3
single_case max, %1, %2, %3
%endmacro

zero equ 0x00000000
negzero equ 0x80000000
qnan equ 0x7fc00000
snan equ 0x7f800001
%macro min_d 3
case_d min, %1, %2, %3
%endmacro

%macro max_d 3
case_d max, %1, %2, %3
%endmacro

zero_s equ 0x00000000
negzero_s equ 0x80000000
qnan_s equ 0x7fc00000
snan_s equ 0x7f800001

zero_d equ 0x0000_0000_0000_0000
negzero_d equ 0x8000_0000_0000_0000
qnan_d equ 0x7ff8_0000_0000_0000
snan_d equ 0x7ff0_0000_0000_0001

cases:
%macro cases 1
; Basic identities
min zero, zero, zero
max zero, zero, zero
min negzero, negzero, negzero
max negzero, negzero, negzero
min qnan, qnan, qnan
max qnan, qnan, qnan
min%1 zero%1, zero%1, zero%1
max%1 zero%1, zero%1, zero%1
min%1 negzero%1, negzero%1, negzero%1
max%1 negzero%1, negzero%1, negzero%1
min%1 qnan%1, qnan%1, qnan%1
max%1 qnan%1, qnan%1, qnan%1

; "If the values being compared are both 0.0s (of either sign), the value in
; the second source operand is returned"
min zero, negzero, negzero
max zero, negzero, negzero
min negzero, zero, zero
max negzero, zero, zero
min%1 zero%1, negzero%1, negzero%1
max%1 zero%1, negzero%1, negzero%1
min%1 negzero%1, zero%1, zero%1
max%1 negzero%1, zero%1, zero%1

; "If only one value is a NaN (SNaN or QNaN) for this instruction, the second
; source operand, either a NaN or a valid floating-point value, is written to
; the result"
min zero, qnan, qnan
min negzero, qnan, qnan
min qnan, zero, zero
min qnan, negzero, negzero

max zero, qnan, qnan
max negzero, qnan, qnan
max qnan, zero, zero
max qnan, negzero, negzero

min zero, snan, snan
min negzero, snan, snan
min snan, zero, zero
min snan, negzero, negzero

max zero, snan, snan
max negzero, snan, snan
max snan, zero, zero
max snan, negzero, negzero
min%1 zero%1, qnan%1, qnan%1
min%1 negzero%1, qnan%1, qnan%1
min%1 qnan%1, zero%1, zero%1
min%1 qnan%1, negzero%1, negzero%1

max%1 zero%1, qnan%1, qnan%1
max%1 negzero%1, qnan%1, qnan%1
max%1 qnan%1, zero%1, zero%1
max%1 qnan%1, negzero%1, negzero%1

min%1 zero%1, snan%1, snan%1
min%1 negzero%1, snan%1, snan%1
min%1 snan%1, zero%1, zero%1
min%1 snan%1, negzero%1, negzero%1

max%1 zero%1, snan%1, snan%1
max%1 negzero%1, snan%1, snan%1
max%1 snan%1, zero%1, zero%1
max%1 snan%1, negzero%1, negzero%1

; "If a value in the second operand is an SNaN, that SNaN is returned
; unchanged to the destination (that is, a QNaN version of the SNaN is not
; returned)."
min qnan, snan, snan
min snan, snan, snan
min%1 qnan%1, snan%1, snan%1
min%1 snan%1, snan%1, snan%1
%endmacro

single_cases:
cases _s

cases_double:
cases _d

success:
mov rax, 0
Expand Down
12 changes: 4 additions & 8 deletions unittests/InstructionCountCI/AFP/VEX_map1.json
Original file line number Diff line number Diff line change
Expand Up @@ -404,14 +404,12 @@
]
},
"vminps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 1,
"Comment": [
"Map 1 0b00 0x5d 128-bit"
],
"ExpectedArm64ASM": [
"fcmgt v0.4s, v18.4s, v17.4s",
"mov v16.16b, v17.16b",
"bif v16.16b, v18.16b, v0.16b"
"fmin v16.4s, v17.4s, v18.4s"
]
},
"vminps ymm0, ymm1, ymm2": {
Expand All @@ -428,14 +426,12 @@
]
},
"vminpd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 1,
"Comment": [
"Map 1 0b01 0x5d 128-bit"
],
"ExpectedArm64ASM": [
"fcmgt v0.2d, v18.2d, v17.2d",
"mov v16.16b, v17.16b",
"bif v16.16b, v18.16b, v0.16b"
"fmin v16.2d, v17.2d, v18.2d"
]
},
"vminpd ymm0, ymm1, ymm2": {
Expand Down

0 comments on commit 4f46f55

Please sign in to comment.