Skip to content

Commit

Permalink
instcountci: Improve reciprocal estimate and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed Feb 27, 2025
1 parent c58ba7b commit 77730a5
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 33 deletions.
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@
]
},
"rcpss xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 3,
"Comment": [
"FEAT_FPRES could make this more optimal",
"0xf3 0x0f 0x53"
],
"ExpectedArm64ASM": [
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s17"
"fdiv s0, s0, s17",
"mov v16.s[0], v0.s[0]"
]
},
"addss xmm0, xmm1": {
Expand Down
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@
]
},
"rcpss xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 3,
"Comment": [
"FEAT_FPRES could make this more optimal",
"0xf3 0x0f 0x53"
],
"ExpectedArm64ASM": [
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s17"
"fdiv s0, s0, s17",
"mov v16.s[0], v0.s[0]"
]
},
"addss xmm0, xmm1": {
Expand Down
5 changes: 3 additions & 2 deletions unittests/InstructionCountCI/AFP/VEX_map1.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,16 @@
]
},
"vrcpss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 4,
"Comment": [
"FEAT_FPRES could make this more optimal",
"Map 1 0b10 0x53 128-bit"
],
"ExpectedArm64ASM": [
"mov v16.16b, v17.16b",
"fmov s0, #0x70 (1.0000)",
"fdiv s16, s0, s18"
"fdiv s0, s0, s18",
"mov v16.s[0], v0.s[0]"
]
},
"vcmpss xmm0, xmm1, xmm2, 0x00": {
Expand Down
24 changes: 15 additions & 9 deletions unittests/InstructionCountCI/DDD.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,19 @@
]
},
"pfrsqrtv mm0, mm1": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 10,
"Comment": [
"0x0f 0x0f 0x87"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"fabs v3.4s, v2.4s",
"fmov v0.4s, #0x70 (1.0000)",
"fsqrt v1.4s, v2.4s",
"fdiv v2.4s, v0.4s, v1.4s",
"str d2, [x28, #1040]",
"fsqrt v1.4s, v3.4s",
"fdiv v3.4s, v0.4s, v1.4s",
"movi v0.2s, #0x80, lsl #24",
"bit v3.8b, v2.8b, v0.8b",
"str d3, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
Expand Down Expand Up @@ -174,16 +177,19 @@
]
},
"pfrsqrt mm0, mm1": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 11,
"Comment": [
"0x0f 0x0f 0x97"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"fmov s0, #0x70 (1.0000)",
"fsqrt s1, s2",
"fdiv s2, s0, s1",
"dup v2.2s, v2.s[0]",
"fabs v3.4s, v2.4s",
"fmov v0.4s, #0x70 (1.0000)",
"fsqrt v1.4s, v3.4s",
"fdiv v3.4s, v0.4s, v1.4s",
"movi v0.2s, #0x80, lsl #24",
"bit v3.8b, v2.8b, v0.8b",
"dup v2.2s, v3.s[0]",
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
Expand Down
36 changes: 26 additions & 10 deletions unittests/InstructionCountCI/RPRES/DDD.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,54 +12,70 @@
},
"Instructions": {
"pfrcpv mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": [
"0x0f 0x0f 0x86"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"frecpe v2.2s, v2.2s",
"frecpe v0.2s, v2.2s",
"frecps v1.2s, v0.2s, v2.2s",
"fmul v2.2s, v0.2s, v1.2s",
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrsqrtv mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 11,
"Comment": [
"0x0f 0x0f 0x87"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"frsqrte v2.2s, v2.2s",
"str d2, [x28, #1040]",
"fabs v3.4s, v2.4s",
"frsqrte v0.2s, v3.2s",
"fmul v1.2s, v0.2s, v0.2s",
"frsqrts v1.2s, v1.2s, v3.2s",
"fmul v3.2s, v0.2s, v1.2s",
"movi v0.2s, #0x80, lsl #24",
"bit v3.8b, v2.8b, v0.8b",
"str d3, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcp mm0, mm1": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 8,
"Comment": [
"0x0f 0x0f 0x96"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"frecpe s2, s2",
"frecpe s0, s2",
"frecps s1, s0, s2",
"fmul s2, s0, s1",
"dup v2.2s, v2.s[0]",
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrsqrt mm0, mm1": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 12,
"Comment": [
"0x0f 0x0f 0x97"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"frsqrte s2, s2",
"dup v2.2s, v2.s[0]",
"fabs v3.4s, v2.4s",
"frsqrte v0.2s, v3.2s",
"fmul v1.2s, v0.2s, v0.2s",
"frsqrts v1.2s, v1.2s, v3.2s",
"fmul v3.2s, v0.2s, v1.2s",
"movi v0.2s, #0x80, lsl #24",
"bit v3.8b, v2.8b, v0.8b",
"dup v2.2s, v3.s[0]",
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
Expand Down
13 changes: 9 additions & 4 deletions unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,26 @@
},
"Instructions": {
"rsqrtss xmm0, xmm1": {
"ExpectedInstructionCount": 1,
"ExpectedInstructionCount": 5,
"Comment": [
"0xf3 0x0f 0x52"
],
"ExpectedArm64ASM": [
"frsqrte s16, s17"
"frsqrte s0, s17",
"fmul s1, s0, s0",
"frsqrts s1, s1, s17",
"fmul s0, s0, s1",
"mov v16.s[0], v0.s[0]"
]
},
"rcpss xmm0, xmm1": {
"ExpectedInstructionCount": 1,
"ExpectedInstructionCount": 2,
"Comment": [
"0xf3 0x0f 0x53"
],
"ExpectedArm64ASM": [
"frecpe s16, s17"
"frecpe s0, s17",
"mov v16.s[0], v0.s[0]"
]
}
}
Expand Down
13 changes: 9 additions & 4 deletions unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,18 @@
]
},
"vrsqrtss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 6,
"Comment": [
"AFP can make this more optimal",
"Map 1 0b10 0x52 128-bit"
],
"ExpectedArm64ASM": [
"mov v16.16b, v17.16b",
"frsqrte s16, s18"
"frsqrte s0, s18",
"fmul s1, s0, s0",
"frsqrts s1, s1, s18",
"fmul s0, s0, s1",
"mov v16.s[0], v0.s[0]"
]
},
"vrcpps xmm0, xmm1": {
Expand All @@ -58,13 +62,14 @@
]
},
"vrcpss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 1 0b10 0x53 128-bit"
],
"ExpectedArm64ASM": [
"mov v16.16b, v17.16b",
"frecpe s16, s18"
"frecpe s0, s18",
"mov v16.s[0], v0.s[0]"
]
}
}
Expand Down

0 comments on commit 77730a5

Please sign in to comment.