-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4330 from Sonicadvance1/hotblock_tso_32bit
InstcountCI: Adds a hotblock for 32-bit TSO testing
- Loading branch information
Showing
1 changed file
with
192 additions
and
0 deletions.
There are no files selected for viewing
192 changes: 192 additions & 0 deletions
192
unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
{ | ||
"Features": { | ||
"Bitness": 32, | ||
"EnabledHostFeatures": [ | ||
"TSO", | ||
"LRCPC", | ||
"LRCPC2", | ||
"FLAGM", | ||
"FLAGM2" | ||
], | ||
"DisabledHostFeatures": [ | ||
"SVE128", | ||
"SVE256", | ||
"AFP" | ||
] | ||
}, | ||
"Instructions": { | ||
"The Sims 1 hot block": { | ||
"x86InstructionCount": 47, | ||
"ExpectedInstructionCount": 106, | ||
"Comment": [ | ||
"Hottest in-game block from The Sims 1, Legacy Collection", | ||
"Consumed 6.13% of a CPU core on Oryon-1", | ||
"Some interesting problems in this block:", | ||
" - LRCPC2 small immediate loadstores aren't getting used", | ||
" - Memory stores of zero aren't using wzr register", | ||
" - Vector loadstores are using large negative numbers which Arm64 can't optimize", | ||
" - Would need to generate a temporary and convert negative offsets to positive (Or close enough for simm9)", | ||
" - LRCPC3 isn't used for vector loadstores at all", | ||
" - Where the excessive dmb comes from", | ||
" - No hardware ships with LRCPC3 yet anyway" | ||
], | ||
"x86Insts": [ | ||
"push ebp", | ||
"mov ebp, esp", | ||
"sub esp, 0x1c4", | ||
"mov eax, dword [0xa37400]", | ||
"xor eax, ebp", | ||
"mov dword [ebp-0x4], eax", | ||
"mov edx, dword [ebp+0x8]", | ||
"push ebx", | ||
"push esi", | ||
"mov esi, dword [ebp+0x18]", | ||
"push edi", | ||
"mov edi, ecx", | ||
"mov dword [ebp-0x184], edx", | ||
"mov dword [ebp-0x180], 0x0", | ||
"lea ecx, [ebp-0x16c]", | ||
"add esi, 0x28", | ||
"mov edx, 0x6", | ||
"mov eax, dword [edi+0x3190]", | ||
"mov dword [ebp-0x188], eax", | ||
"mov eax, dword [ebp+0xc]", | ||
"mov dword [ebp-0x178], eax", | ||
"mov eax, dword [ebp+0x10]", | ||
"mov dword [ebp-0x170], eax", | ||
"movzx eax, byte [ebp+0x14]", | ||
"mov dword [ebp-0x17c], eax", | ||
"movq xmm0, qword [esi-0x28]", | ||
"xorps xmm1, xmm1", | ||
"mov eax, dword [esi-0x20]", | ||
"lea esi, [esi+0x4c]", | ||
"movq qword [ebp-0x1c4], xmm0", | ||
"lea ecx, [ecx+0x3c]", | ||
"mov dword [ebp-0x1bc], eax", | ||
"movups xmm0, [esi-0x58]", | ||
"movups [ebp-0x1a8], xmm1", | ||
"movups [ebp-0x1b8], xmm0", | ||
"movups xmm0, [ebp-0x1c4]", | ||
"movups [ebp-0x198], xmm1", | ||
"psrldq xmm1, 0xc", | ||
"movups [ecx-0x3c], xmm0", | ||
"movups xmm0, [ebp-0x1b4]", | ||
"movups [ecx-0x2c], xmm0", | ||
"movups xmm0, [ebp-0x1a4]", | ||
"movups [ecx-0x1c], xmm0", | ||
"movq xmm0, qword [ebp-0x194]", | ||
"movq qword [ecx-0xc], xmm0", | ||
"movd dword [ecx-0x4], xmm1", | ||
"sub edx, 0x1" | ||
], | ||
"ExpectedArm64ASM": [ | ||
"str w9, [x8, #-4]!", | ||
"mov x9, x8", | ||
"sub w8, w8, #0x1c4 (452)", | ||
"mov w20, #0x7400", | ||
"movk w20, #0xa3, lsl #16", | ||
"ldapur w4, [x20]", | ||
"nop", | ||
"eor w4, w4, w9", | ||
"mov w20, #0x0", | ||
"sub w21, w9, #0x4 (4)", | ||
"nop", | ||
"stlur w4, [x21]", | ||
"add w21, w9, #0x8 (8)", | ||
"ldapur w5, [x21]", | ||
"nop", | ||
"str w6, [x8, #-4]!", | ||
"str w10, [x8, #-4]!", | ||
"add w21, w9, #0x18 (24)", | ||
"ldapur w10, [x21]", | ||
"nop", | ||
"str w11, [x8, #-4]!", | ||
"mov x11, x7", | ||
"sub w21, w9, #0x184 (388)", | ||
"nop", | ||
"stlur w5, [x21]", | ||
"sub w21, w9, #0x180 (384)", | ||
"nop", | ||
"stlur w20, [x21]", | ||
"sub w7, w9, #0x16c (364)", | ||
"add w10, w10, #0x28 (40)", | ||
"mov w5, #0x6", | ||
"mov w20, #0x3190", | ||
"add w20, w11, w20", | ||
"ldapur w4, [x20]", | ||
"nop", | ||
"sub w20, w9, #0x188 (392)", | ||
"nop", | ||
"stlur w4, [x20]", | ||
"add w20, w9, #0xc (12)", | ||
"ldapur w4, [x20]", | ||
"nop", | ||
"sub w20, w9, #0x178 (376)", | ||
"nop", | ||
"stlur w4, [x20]", | ||
"add w20, w9, #0x10 (16)", | ||
"ldapur w4, [x20]", | ||
"nop", | ||
"sub w20, w9, #0x170 (368)", | ||
"nop", | ||
"stlur w4, [x20]", | ||
"add w20, w9, #0x14 (20)", | ||
"ldapurb w4, [x20]", | ||
"sub w20, w9, #0x17c (380)", | ||
"nop", | ||
"stlur w4, [x20]", | ||
"ldur d16, [x10, #-40]", | ||
"dmb ishld", | ||
"movi v17.2d, #0x0", | ||
"sub w20, w10, #0x20 (32)", | ||
"ldapur w4, [x20]", | ||
"nop", | ||
"add w10, w10, #0x4c (76)", | ||
"mov x20, #0xfffffffffffffe3c", | ||
"dmb ish", | ||
"str d16, [x9, x20, sxtx]", | ||
"add w7, w7, #0x3c (60)", | ||
"sub w21, w9, #0x1bc (444)", | ||
"nop", | ||
"stlur w4, [x21]", | ||
"ldur q16, [x10, #-88]", | ||
"dmb ishld", | ||
"mov v2.16b, v17.16b", | ||
"mov x21, #0xfffffffffffffe58", | ||
"dmb ish", | ||
"str q17, [x9, x21, sxtx]", | ||
"mov x21, #0xfffffffffffffe48", | ||
"dmb ish", | ||
"str q16, [x9, x21, sxtx]", | ||
"ldr q16, [x9, x20, sxtx]", | ||
"dmb ishld", | ||
"mov x20, #0xfffffffffffffe68", | ||
"dmb ish", | ||
"str q17, [x9, x20, sxtx]", | ||
"ext v17.16b, v17.16b, v2.16b, #12", | ||
"dmb ish", | ||
"stur q16, [x7, #-60]", | ||
"mov x20, #0xfffffffffffffe4c", | ||
"ldr q16, [x9, x20, sxtx]", | ||
"dmb ishld", | ||
"dmb ish", | ||
"stur q16, [x7, #-44]", | ||
"mov x20, #0xfffffffffffffe5c", | ||
"ldr q16, [x9, x20, sxtx]", | ||
"dmb ishld", | ||
"dmb ish", | ||
"stur q16, [x7, #-28]", | ||
"mov x20, #0xfffffffffffffe6c", | ||
"ldr d16, [x9, x20, sxtx]", | ||
"dmb ishld", | ||
"dmb ish", | ||
"stur d16, [x7, #-12]", | ||
"sub w20, w7, #0x4 (4)", | ||
"str s17, [x20]", | ||
"subs w26, w5, #0x1 (1)", | ||
"mov x27, x5", | ||
"mov x5, x26" | ||
] | ||
} | ||
} | ||
} |