diff --git a/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp index 155731e146..3f9cd31c7c 100644 --- a/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp @@ -1547,7 +1547,7 @@ DEF_OP(StoreMem) { const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); if (Op->Class == FEXCore::IR::GPRClass) { - const auto Src = GetReg(Op->Value.ID()); + const auto Src = GetZeroableReg(Op->Value); switch (OpSize) { case IR::OpSize::i8Bit: strb(Src, MemSrc); break; case IR::OpSize::i16Bit: strh(Src, MemSrc); break; @@ -1658,8 +1658,8 @@ DEF_OP(StoreMemPair) { const auto Addr = GetReg(Op->Addr.ID()); if (Op->Class == FEXCore::IR::GPRClass) { - const auto Src1 = GetReg(Op->Value1.ID()); - const auto Src2 = GetReg(Op->Value2.ID()); + const auto Src1 = GetZeroableReg(Op->Value1); + const auto Src2 = GetZeroableReg(Op->Value2); switch (OpSize) { case IR::OpSize::i32Bit: stp(Src1.W(), Src2.W(), Addr, Op->Offset); break; case IR::OpSize::i64Bit: stp(Src1.X(), Src2.X(), Addr, Op->Offset); break; @@ -1691,7 +1691,7 @@ DEF_OP(StoreMemTSO) { } if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) { - const auto Src = GetReg(Op->Value.ID()); + const auto Src = GetZeroableReg(Op->Value); uint64_t Offset = 0; if (!Op->Offset.IsInvalid()) { LOGMAN_THROW_A_FMT(IsInlineConstant(Op->Offset, &Offset), "expected immediate"); @@ -1711,7 +1711,7 @@ DEF_OP(StoreMemTSO) { } } } else if (Op->Class == FEXCore::IR::GPRClass) { - const auto Src = GetReg(Op->Value.ID()); + const auto Src = GetZeroableReg(Op->Value); if (OpSize == IR::OpSize::i8Bit) { // 8bit load is always aligned to natural alignment @@ -1763,7 +1763,7 @@ DEF_OP(MemSet) { const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled(); const auto Size = IR::OpSizeToSize(Op->Size); const auto MemReg = GetReg(Op->Addr.ID()); - const auto Value = GetReg(Op->Value.ID()); + const auto Value = GetZeroableReg(Op->Value); const auto Length = GetReg(Op->Length.ID()); const auto Dst = GetReg(Node); @@ -2312,7 +2312,7 @@ DEF_OP(ParanoidStoreMemTSO) { auto MemReg = GetReg(Op->Addr.ID()); if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) { - const auto Src = GetReg(Op->Value.ID()); + const auto Src = GetZeroableReg(Op->Value); uint64_t Offset = 0; if (!Op->Offset.IsInvalid()) { if (!IsInlineConstant(Op->Offset, &Offset)) { @@ -2332,7 +2332,7 @@ DEF_OP(ParanoidStoreMemTSO) { } } } else if (Op->Class == FEXCore::IR::GPRClass) { - const auto Src = GetReg(Op->Value.ID()); + const auto Src = GetZeroableReg(Op->Value); MemReg = ApplyMemOperand(OpSize, MemReg, TMP1, Op->Offset, Op->OffsetType, Op->OffsetScale); switch (OpSize) { case IR::OpSize::i8Bit: stlrb(Src, MemReg); break; diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index ed8aeecd4c..145d676a29 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -667,6 +667,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current case OP_STOREMEM: { auto Op = IROp->CW(); InlineMemImmediate(IREmit, CurrentIR, CodeNode, IROp, Op->Offset, Op->OffsetType, Op->Offset_Index, Op->OffsetScale, false); + InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index); break; } case OP_PREFETCH: { @@ -682,6 +683,13 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current case OP_STOREMEMTSO: { auto Op = IROp->CW(); InlineMemImmediate(IREmit, CurrentIR, CodeNode, IROp, Op->Offset, Op->OffsetType, Op->Offset_Index, Op->OffsetScale, true); + InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index); + break; + } + case OP_STOREMEMPAIR: { + auto Op = IROp->CW(); + InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value1_Index); + InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value2_Index); break; } case OP_MEMCPY: { @@ -692,6 +700,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current case OP_MEMSET: { auto Op = IROp->CW(); Inline(IREmit, CurrentIR, CodeNode, IROp, Op->Direction_Index); + InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, Op->Value_Index); break; } diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json index e524683a3c..0607c98ea6 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json @@ -17,7 +17,7 @@ "Instructions": { "The Sims 1 hot block": { "x86InstructionCount": 47, - "ExpectedInstructionCount": 106, + "ExpectedInstructionCount": 105, "Comment": [ "Hottest in-game block from The Sims 1, Legacy Collection", "Consumed 6.13% of a CPU core on Oryon-1", @@ -88,26 +88,25 @@ "ldapur w4, [x20]", "nop", "eor w4, w4, w9", - "mov w20, #0x0", - "sub w21, w9, #0x4 (4)", + "sub w20, w9, #0x4 (4)", "nop", - "stlur w4, [x21]", - "add w21, w9, #0x8 (8)", - "ldapur w5, [x21]", + "stlur w4, [x20]", + "add w20, w9, #0x8 (8)", + "ldapur w5, [x20]", "nop", "str w6, [x8, #-4]!", "str w10, [x8, #-4]!", - "add w21, w9, #0x18 (24)", - "ldapur w10, [x21]", + "add w20, w9, #0x18 (24)", + "ldapur w10, [x20]", "nop", "str w11, [x8, #-4]!", "mov x11, x7", - "sub w21, w9, #0x184 (388)", + "sub w20, w9, #0x184 (388)", "nop", - "stlur w5, [x21]", - "sub w21, w9, #0x180 (384)", + "stlur w5, [x20]", + "sub w20, w9, #0x180 (384)", "nop", - "stlur w20, [x21]", + "stlur wzr, [x20]", "sub w7, w9, #0x16c (364)", "add w10, w10, #0x28 (40)", "mov w5, #0x6", diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index c6a1d1828b..2acddd30db 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -14,11 +14,10 @@ }, "Instructions": { "sgdt [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 3, "Comment": "GROUP7 0x0F 0x1 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strh w20, [x4]", + "strh wzr, [x4]", "mov x20, #0xfffffffffffe0000", "stur x20, [x4, #2]" ] diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index e713c628b0..90d0e33d2b 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -3994,13 +3994,13 @@ "lsr w21, w21, #7", "and w21, w21, #0x1", "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", + "csel x20, x23, x20, eq", + "orr w20, w22, w20, lsl #14", + "str w20, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "msr nzcv, x24" ] }, @@ -11600,13 +11600,13 @@ "lsr w22, w22, #7", "and w22, w22, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", + "csel x21, x24, x21, eq", + "orr w21, w23, w21, lsl #14", + "str w21, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", "stur q2, [x4, #28]", diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json index 9145ffa46a..c1d1e4479e 100644 --- a/unittests/InstructionCountCI/FlagM/x87_f64.json +++ b/unittests/InstructionCountCI/FlagM/x87_f64.json @@ -1656,13 +1656,13 @@ "lsr w21, w21, #7", "and w21, w21, #0x1", "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", + "csel x20, x23, x20, eq", + "orr w20, w22, w20, lsl #14", + "str w20, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "msr nzcv, x24" ] }, @@ -6634,10 +6634,10 @@ "csel x22, x24, x21, eq", "orr w22, w23, w22, lsl #14", "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #1040]", "mrs x0, nzcv", diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index ebe0738109..c10914eaef 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -3049,35 +3049,31 @@ ] }, "mov byte [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP11 0xc6 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strb w20, [x4]" + "strb wzr, [x4]" ] }, "mov word [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strh w20, [x4]" + "strh wzr, [x4]" ] }, "mov dword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "str w20, [x4]" + "str wzr, [x4]" ] }, "mov qword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "str x20, [x4]" + "str xzr, [x4]" ] }, "mov byte [rax], 1": { diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 2f1848d055..f2b88bfac1 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -14,11 +14,10 @@ }, "Instructions": { "sgdt [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 3, "Comment": "GROUP7 0x0F 0x1 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strh w20, [x4]", + "strh wzr, [x4]", "mov x20, #0xfffffffffffe0000", "stur x20, [x4, #2]" ] diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index be18b2d2d2..539ca0d7fe 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -3993,13 +3993,13 @@ "lsr w21, w21, #7", "and w21, w21, #0x1", "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", + "csel x20, x23, x20, eq", + "orr w20, w22, w20, lsl #14", + "str w20, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "msr nzcv, x24" ] }, @@ -11631,13 +11631,13 @@ "lsr w22, w22, #7", "and w22, w22, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", + "csel x21, x24, x21, eq", + "orr w21, w23, w21, lsl #14", + "str w21, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", "stur q2, [x4, #28]", diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json index 3ec642dccd..19237df490 100644 --- a/unittests/InstructionCountCI/x87_f64.json +++ b/unittests/InstructionCountCI/x87_f64.json @@ -1673,13 +1673,13 @@ "lsr w21, w21, #7", "and w21, w21, #0x1", "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", + "csel x20, x23, x20, eq", + "orr w20, w22, w20, lsl #14", + "str w20, [x4, #8]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "msr nzcv, x24" ] }, @@ -6673,10 +6673,10 @@ "csel x22, x24, x21, eq", "orr w22, w23, w22, lsl #14", "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", + "str wzr, [x4, #12]", + "str wzr, [x4, #16]", + "str wzr, [x4, #20]", + "str wzr, [x4, #24]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #1040]", "mrs x0, nzcv",