Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Optimize memory stores with zero #4342

Merged
merged 2 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,7 @@ DEF_OP(StoreMem) {
const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

if (Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
const auto Src = GetZeroableReg(Op->Value);
switch (OpSize) {
case IR::OpSize::i8Bit: strb(Src, MemSrc); break;
case IR::OpSize::i16Bit: strh(Src, MemSrc); break;
Expand Down Expand Up @@ -1658,8 +1658,8 @@ DEF_OP(StoreMemPair) {
const auto Addr = GetReg(Op->Addr.ID());

if (Op->Class == FEXCore::IR::GPRClass) {
const auto Src1 = GetReg(Op->Value1.ID());
const auto Src2 = GetReg(Op->Value2.ID());
const auto Src1 = GetZeroableReg(Op->Value1);
const auto Src2 = GetZeroableReg(Op->Value2);
switch (OpSize) {
case IR::OpSize::i32Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.W(), Src2.W(), Addr, Op->Offset); break;
case IR::OpSize::i64Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.X(), Src2.X(), Addr, Op->Offset); break;
Expand Down Expand Up @@ -1691,7 +1691,7 @@ DEF_OP(StoreMemTSO) {
}

if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
const auto Src = GetZeroableReg(Op->Value);
uint64_t Offset = 0;
if (!Op->Offset.IsInvalid()) {
LOGMAN_THROW_A_FMT(IsInlineConstant(Op->Offset, &Offset), "expected immediate");
Expand All @@ -1711,7 +1711,7 @@ DEF_OP(StoreMemTSO) {
}
}
} else if (Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
const auto Src = GetZeroableReg(Op->Value);

if (OpSize == IR::OpSize::i8Bit) {
// 8bit load is always aligned to natural alignment
Expand Down Expand Up @@ -1763,7 +1763,7 @@ DEF_OP(MemSet) {
const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled();
const auto Size = IR::OpSizeToSize(Op->Size);
const auto MemReg = GetReg(Op->Addr.ID());
const auto Value = GetReg(Op->Value.ID());
const auto Value = GetZeroableReg(Op->Value);
const auto Length = GetReg(Op->Length.ID());
const auto Dst = GetReg(Node);

Expand Down Expand Up @@ -2312,7 +2312,7 @@ DEF_OP(ParanoidStoreMemTSO) {
auto MemReg = GetReg(Op->Addr.ID());

if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
const auto Src = GetZeroableReg(Op->Value);
uint64_t Offset = 0;
if (!Op->Offset.IsInvalid()) {
if (!IsInlineConstant(Op->Offset, &Offset)) {
Expand All @@ -2332,7 +2332,7 @@ DEF_OP(ParanoidStoreMemTSO) {
}
}
} else if (Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
const auto Src = GetZeroableReg(Op->Value);
MemReg = ApplyMemOperand(OpSize, MemReg, TMP1, Op->Offset, Op->OffsetType, Op->OffsetScale);
switch (OpSize) {
case IR::OpSize::i8Bit: stlrb(Src, MemReg); break;
Expand Down
9 changes: 9 additions & 0 deletions FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
case OP_STOREMEM: {
auto Op = IROp->CW<IR::IROp_StoreMem>();
InlineMemImmediate(IREmit, CurrentIR, CodeNode, IROp, Op->Offset, Op->OffsetType, Op->Offset_Index, Op->OffsetScale, false);
InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index);
break;
}
case OP_PREFETCH: {
Expand All @@ -682,6 +683,13 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
case OP_STOREMEMTSO: {
auto Op = IROp->CW<IR::IROp_StoreMemTSO>();
InlineMemImmediate(IREmit, CurrentIR, CodeNode, IROp, Op->Offset, Op->OffsetType, Op->Offset_Index, Op->OffsetScale, true);
InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index);
break;
}
case OP_STOREMEMPAIR: {
auto Op = IROp->CW<IR::IROp_StoreMemPair>();
InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value1_Index);
InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value2_Index);
break;
}
case OP_MEMCPY: {
Expand All @@ -692,6 +700,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
case OP_MEMSET: {
auto Op = IROp->CW<IR::IROp_MemSet>();
Inline(IREmit, CurrentIR, CodeNode, IROp, Op->Direction_Index);
InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index);
break;
}

Expand Down
23 changes: 11 additions & 12 deletions unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"Instructions": {
"The Sims 1 hot block": {
"x86InstructionCount": 47,
"ExpectedInstructionCount": 106,
"ExpectedInstructionCount": 105,
"Comment": [
"Hottest in-game block from The Sims 1, Legacy Collection",
"Consumed 6.13% of a CPU core on Oryon-1",
Expand Down Expand Up @@ -88,26 +88,25 @@
"ldapur w4, [x20]",
"nop",
"eor w4, w4, w9",
"mov w20, #0x0",
"sub w21, w9, #0x4 (4)",
"sub w20, w9, #0x4 (4)",
"nop",
"stlur w4, [x21]",
"add w21, w9, #0x8 (8)",
"ldapur w5, [x21]",
"stlur w4, [x20]",
"add w20, w9, #0x8 (8)",
"ldapur w5, [x20]",
"nop",
"str w6, [x8, #-4]!",
"str w10, [x8, #-4]!",
"add w21, w9, #0x18 (24)",
"ldapur w10, [x21]",
"add w20, w9, #0x18 (24)",
"ldapur w10, [x20]",
"nop",
"str w11, [x8, #-4]!",
"mov x11, x7",
"sub w21, w9, #0x184 (388)",
"sub w20, w9, #0x184 (388)",
"nop",
"stlur w5, [x21]",
"sub w21, w9, #0x180 (384)",
"stlur w5, [x20]",
"sub w20, w9, #0x180 (384)",
"nop",
"stlur w20, [x21]",
"stlur wzr, [x20]",
"sub w7, w9, #0x16c (364)",
"add w10, w10, #0x28 (40)",
"mov w5, #0x6",
Expand Down
5 changes: 2 additions & 3 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
},
"Instructions": {
"sgdt [rax]": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "GROUP7 0x0F 0x1 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"strh w20, [x4]",
"strh wzr, [x4]",
"mov x20, #0xfffffffffffe0000",
"stur x20, [x4, #2]"
]
Expand Down
28 changes: 14 additions & 14 deletions unittests/InstructionCountCI/FlagM/x87.json
Original file line number Diff line number Diff line change
Expand Up @@ -3994,13 +3994,13 @@
"lsr w21, w21, #7",
"and w21, w21, #0x1",
"cmp x21, #0x0 (0)",
"csel x21, x23, x20, eq",
"orr w21, w22, w21, lsl #14",
"str w21, [x4, #8]",
"str w20, [x4, #12]",
"str w20, [x4, #16]",
"str w20, [x4, #20]",
"str w20, [x4, #24]",
"csel x20, x23, x20, eq",
"orr w20, w22, w20, lsl #14",
"str w20, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"msr nzcv, x24"
]
},
Expand Down Expand Up @@ -11600,13 +11600,13 @@
"lsr w22, w22, #7",
"and w22, w22, #0x1",
"cmp x22, #0x0 (0)",
"csel x22, x24, x21, eq",
"orr w22, w23, w22, lsl #14",
"str w22, [x4, #8]",
"str w21, [x4, #12]",
"str w21, [x4, #16]",
"str w21, [x4, #20]",
"str w21, [x4, #24]",
"csel x21, x24, x21, eq",
"orr w21, w23, w21, lsl #14",
"str w21, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"stur q2, [x4, #28]",
Expand Down
22 changes: 11 additions & 11 deletions unittests/InstructionCountCI/FlagM/x87_f64.json
Original file line number Diff line number Diff line change
Expand Up @@ -1656,13 +1656,13 @@
"lsr w21, w21, #7",
"and w21, w21, #0x1",
"cmp x21, #0x0 (0)",
"csel x21, x23, x20, eq",
"orr w21, w22, w21, lsl #14",
"str w21, [x4, #8]",
"str w20, [x4, #12]",
"str w20, [x4, #16]",
"str w20, [x4, #20]",
"str w20, [x4, #24]",
"csel x20, x23, x20, eq",
"orr w20, w22, w20, lsl #14",
"str w20, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"msr nzcv, x24"
]
},
Expand Down Expand Up @@ -6634,10 +6634,10 @@
"csel x22, x24, x21, eq",
"orr w22, w23, w22, lsl #14",
"str w22, [x4, #8]",
"str w21, [x4, #12]",
"str w21, [x4, #16]",
"str w21, [x4, #20]",
"str w21, [x4, #24]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"add x0, x28, x20, lsl #4",
"ldr d2, [x0, #1040]",
"mrs x0, nzcv",
Expand Down
20 changes: 8 additions & 12 deletions unittests/InstructionCountCI/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -3049,35 +3049,31 @@
]
},
"mov byte [rax], 0": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"Comment": "GROUP11 0xc6 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"strb w20, [x4]"
"strb wzr, [x4]"
]
},
"mov word [rax], 0": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"Comment": "GROUP11 0xc7 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"strh w20, [x4]"
"strh wzr, [x4]"
]
},
"mov dword [rax], 0": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"Comment": "GROUP11 0xc7 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"str w20, [x4]"
"str wzr, [x4]"
]
},
"mov qword [rax], 0": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"Comment": "GROUP11 0xc7 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"str x20, [x4]"
"str xzr, [x4]"
]
},
"mov byte [rax], 1": {
Expand Down
5 changes: 2 additions & 3 deletions unittests/InstructionCountCI/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
},
"Instructions": {
"sgdt [rax]": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "GROUP7 0x0F 0x1 /0",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"strh w20, [x4]",
"strh wzr, [x4]",
"mov x20, #0xfffffffffffe0000",
"stur x20, [x4, #2]"
]
Expand Down
28 changes: 14 additions & 14 deletions unittests/InstructionCountCI/x87.json
Original file line number Diff line number Diff line change
Expand Up @@ -3993,13 +3993,13 @@
"lsr w21, w21, #7",
"and w21, w21, #0x1",
"cmp x21, #0x0 (0)",
"csel x21, x23, x20, eq",
"orr w21, w22, w21, lsl #14",
"str w21, [x4, #8]",
"str w20, [x4, #12]",
"str w20, [x4, #16]",
"str w20, [x4, #20]",
"str w20, [x4, #24]",
"csel x20, x23, x20, eq",
"orr w20, w22, w20, lsl #14",
"str w20, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"msr nzcv, x24"
]
},
Expand Down Expand Up @@ -11631,13 +11631,13 @@
"lsr w22, w22, #7",
"and w22, w22, #0x1",
"cmp x22, #0x0 (0)",
"csel x22, x24, x21, eq",
"orr w22, w23, w22, lsl #14",
"str w22, [x4, #8]",
"str w21, [x4, #12]",
"str w21, [x4, #16]",
"str w21, [x4, #20]",
"str w21, [x4, #24]",
"csel x21, x24, x21, eq",
"orr w21, w23, w21, lsl #14",
"str w21, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"stur q2, [x4, #28]",
Expand Down
22 changes: 11 additions & 11 deletions unittests/InstructionCountCI/x87_f64.json
Original file line number Diff line number Diff line change
Expand Up @@ -1673,13 +1673,13 @@
"lsr w21, w21, #7",
"and w21, w21, #0x1",
"cmp x21, #0x0 (0)",
"csel x21, x23, x20, eq",
"orr w21, w22, w21, lsl #14",
"str w21, [x4, #8]",
"str w20, [x4, #12]",
"str w20, [x4, #16]",
"str w20, [x4, #20]",
"str w20, [x4, #24]",
"csel x20, x23, x20, eq",
"orr w20, w22, w20, lsl #14",
"str w20, [x4, #8]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"msr nzcv, x24"
]
},
Expand Down Expand Up @@ -6673,10 +6673,10 @@
"csel x22, x24, x21, eq",
"orr w22, w23, w22, lsl #14",
"str w22, [x4, #8]",
"str w21, [x4, #12]",
"str w21, [x4, #16]",
"str w21, [x4, #20]",
"str w21, [x4, #24]",
"str wzr, [x4, #12]",
"str wzr, [x4, #16]",
"str wzr, [x4, #20]",
"str wzr, [x4, #24]",
"add x0, x28, x20, lsl #4",
"ldr d2, [x0, #1040]",
"mrs x0, nzcv",
Expand Down
Loading