Skip to content

Commit

Permalink
Merge pull request #4330 from Sonicadvance1/hotblock_tso_32bit
Browse files Browse the repository at this point in the history
InstcountCI: Adds a hotblock for 32-bit TSO testing
  • Loading branch information
lioncash authored Feb 7, 2025
2 parents 9af52fb + 99b8046 commit e8cd655
Showing 1 changed file with 192 additions and 0 deletions.
192 changes: 192 additions & 0 deletions unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"Features": {
"Bitness": 32,
"EnabledHostFeatures": [
"TSO",
"LRCPC",
"LRCPC2",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"AFP"
]
},
"Instructions": {
"The Sims 1 hot block": {
"x86InstructionCount": 47,
"ExpectedInstructionCount": 106,
"Comment": [
"Hottest in-game block from The Sims 1, Legacy Collection",
"Consumed 6.13% of a CPU core on Oryon-1",
"Some interesting problems in this block:",
" - LRCPC2 small immediate loadstores aren't getting used",
" - Memory stores of zero aren't using wzr register",
" - Vector loadstores are using large negative numbers which Arm64 can't optimize",
" - Would need to generate a temporary and convert negative offsets to positive (Or close enough for simm9)",
" - LRCPC3 isn't used for vector loadstores at all",
" - Where the excessive dmb comes from",
" - No hardware ships with LRCPC3 yet anyway"
],
"x86Insts": [
"push ebp",
"mov ebp, esp",
"sub esp, 0x1c4",
"mov eax, dword [0xa37400]",
"xor eax, ebp",
"mov dword [ebp-0x4], eax",
"mov edx, dword [ebp+0x8]",
"push ebx",
"push esi",
"mov esi, dword [ebp+0x18]",
"push edi",
"mov edi, ecx",
"mov dword [ebp-0x184], edx",
"mov dword [ebp-0x180], 0x0",
"lea ecx, [ebp-0x16c]",
"add esi, 0x28",
"mov edx, 0x6",
"mov eax, dword [edi+0x3190]",
"mov dword [ebp-0x188], eax",
"mov eax, dword [ebp+0xc]",
"mov dword [ebp-0x178], eax",
"mov eax, dword [ebp+0x10]",
"mov dword [ebp-0x170], eax",
"movzx eax, byte [ebp+0x14]",
"mov dword [ebp-0x17c], eax",
"movq xmm0, qword [esi-0x28]",
"xorps xmm1, xmm1",
"mov eax, dword [esi-0x20]",
"lea esi, [esi+0x4c]",
"movq qword [ebp-0x1c4], xmm0",
"lea ecx, [ecx+0x3c]",
"mov dword [ebp-0x1bc], eax",
"movups xmm0, [esi-0x58]",
"movups [ebp-0x1a8], xmm1",
"movups [ebp-0x1b8], xmm0",
"movups xmm0, [ebp-0x1c4]",
"movups [ebp-0x198], xmm1",
"psrldq xmm1, 0xc",
"movups [ecx-0x3c], xmm0",
"movups xmm0, [ebp-0x1b4]",
"movups [ecx-0x2c], xmm0",
"movups xmm0, [ebp-0x1a4]",
"movups [ecx-0x1c], xmm0",
"movq xmm0, qword [ebp-0x194]",
"movq qword [ecx-0xc], xmm0",
"movd dword [ecx-0x4], xmm1",
"sub edx, 0x1"
],
"ExpectedArm64ASM": [
"str w9, [x8, #-4]!",
"mov x9, x8",
"sub w8, w8, #0x1c4 (452)",
"mov w20, #0x7400",
"movk w20, #0xa3, lsl #16",
"ldapur w4, [x20]",
"nop",
"eor w4, w4, w9",
"mov w20, #0x0",
"sub w21, w9, #0x4 (4)",
"nop",
"stlur w4, [x21]",
"add w21, w9, #0x8 (8)",
"ldapur w5, [x21]",
"nop",
"str w6, [x8, #-4]!",
"str w10, [x8, #-4]!",
"add w21, w9, #0x18 (24)",
"ldapur w10, [x21]",
"nop",
"str w11, [x8, #-4]!",
"mov x11, x7",
"sub w21, w9, #0x184 (388)",
"nop",
"stlur w5, [x21]",
"sub w21, w9, #0x180 (384)",
"nop",
"stlur w20, [x21]",
"sub w7, w9, #0x16c (364)",
"add w10, w10, #0x28 (40)",
"mov w5, #0x6",
"mov w20, #0x3190",
"add w20, w11, w20",
"ldapur w4, [x20]",
"nop",
"sub w20, w9, #0x188 (392)",
"nop",
"stlur w4, [x20]",
"add w20, w9, #0xc (12)",
"ldapur w4, [x20]",
"nop",
"sub w20, w9, #0x178 (376)",
"nop",
"stlur w4, [x20]",
"add w20, w9, #0x10 (16)",
"ldapur w4, [x20]",
"nop",
"sub w20, w9, #0x170 (368)",
"nop",
"stlur w4, [x20]",
"add w20, w9, #0x14 (20)",
"ldapurb w4, [x20]",
"sub w20, w9, #0x17c (380)",
"nop",
"stlur w4, [x20]",
"ldur d16, [x10, #-40]",
"dmb ishld",
"movi v17.2d, #0x0",
"sub w20, w10, #0x20 (32)",
"ldapur w4, [x20]",
"nop",
"add w10, w10, #0x4c (76)",
"mov x20, #0xfffffffffffffe3c",
"dmb ish",
"str d16, [x9, x20, sxtx]",
"add w7, w7, #0x3c (60)",
"sub w21, w9, #0x1bc (444)",
"nop",
"stlur w4, [x21]",
"ldur q16, [x10, #-88]",
"dmb ishld",
"mov v2.16b, v17.16b",
"mov x21, #0xfffffffffffffe58",
"dmb ish",
"str q17, [x9, x21, sxtx]",
"mov x21, #0xfffffffffffffe48",
"dmb ish",
"str q16, [x9, x21, sxtx]",
"ldr q16, [x9, x20, sxtx]",
"dmb ishld",
"mov x20, #0xfffffffffffffe68",
"dmb ish",
"str q17, [x9, x20, sxtx]",
"ext v17.16b, v17.16b, v2.16b, #12",
"dmb ish",
"stur q16, [x7, #-60]",
"mov x20, #0xfffffffffffffe4c",
"ldr q16, [x9, x20, sxtx]",
"dmb ishld",
"dmb ish",
"stur q16, [x7, #-44]",
"mov x20, #0xfffffffffffffe5c",
"ldr q16, [x9, x20, sxtx]",
"dmb ishld",
"dmb ish",
"stur q16, [x7, #-28]",
"mov x20, #0xfffffffffffffe6c",
"ldr d16, [x9, x20, sxtx]",
"dmb ishld",
"dmb ish",
"stur d16, [x7, #-12]",
"sub w20, w7, #0x4 (4)",
"str s17, [x20]",
"subs w26, w5, #0x1 (1)",
"mov x27, x5",
"mov x5, x26"
]
}
}
}

0 comments on commit e8cd655

Please sign in to comment.