Skip to content

Commit

Permalink
JIT: Optimize CAS
Browse files Browse the repository at this point in the history
Hey kid, want to see a sick trick?

Finally optimal codegen for 64-bit cmpxchg.
  • Loading branch information
Sonicadvance1 committed Mar 7, 2025
1 parent b773354 commit a37d6a3
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 54 deletions.
15 changes: 10 additions & 5 deletions FEXCore/Source/Interface/Core/JIT/AtomicOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,16 @@ DEF_OP(CAS) {
auto Expected = GetReg(Op->Expected.ID());
auto Desired = GetReg(Op->Desired.ID());
auto MemSrc = GetReg(Op->Addr.ID());
auto Dst = GetReg(Node);

if (CTX->HostFeatures.SupportsAtomics) {
mov(EmitSize, TMP2, Expected);
casal(SubEmitSize, TMP2, Desired, MemSrc);
mov(EmitSize, GetReg(Node), TMP2.R());
if (Expected == Dst && Dst != MemSrc && Dst != Desired) {
casal(SubEmitSize, Dst, Desired, MemSrc);
} else {
mov(EmitSize, TMP2, Expected);
casal(SubEmitSize, TMP2, Desired, MemSrc);
mov(EmitSize, Dst, TMP2.R());
}
} else {
ARMEmitter::BackwardLabel LoopTop;
ARMEmitter::ForwardLabel LoopNotExpected;
Expand All @@ -122,11 +127,11 @@ DEF_OP(CAS) {
b(ARMEmitter::Condition::CC_NE, &LoopNotExpected);
stlxr(SubEmitSize, TMP3, Desired, MemSrc);
cbnz(EmitSize, TMP3, &LoopTop);
mov(EmitSize, GetReg(Node), Expected);
mov(EmitSize, Dst, Expected);
b(&LoopExpected);

Bind(&LoopNotExpected);
mov(EmitSize, GetReg(Node), TMP2.R());
mov(EmitSize, Dst, TMP2.R());
// exclusive monitor needs to be cleared here
// Might have hit the case where ldaxr was hit but stlxr wasn't
clrex();
Expand Down
14 changes: 4 additions & 10 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3825,23 +3825,17 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
Ref Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;

Ref Src3 {};
Ref Src3Lower {};
if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
Src3 = LoadGPRRegister(X86State::REG_RAX);
Src3Lower = _Bfe(OpSize::i32Bit, 32, 0, Src3);
} else {
Src3 = LoadGPRRegister(X86State::REG_RAX, Size);
Src3Lower = Src3;
}
auto Src3 = LoadGPRRegister(X86State::REG_RAX);
auto Src3Lower = _Bfe(OpSize::i64Bit, OpSizeAsBits(Size), 0, Src3);

// If this is a memory location then we want the pointer to it
Ref Src1 = MakeSegmentAddress(Op, Op->Dest);

// DataSrc = *Src1
// if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc
// This will write to memory! Careful!
// Third operand must be a calculated guest memory address
Ref CASResult = _CAS(Size, Src3Lower, Src2, Src1);
Ref CASResult = _CAS(Size, Src3, Src2, Src1);
Ref RAXResult = CASResult;

CalculateFlags_SUB(OpSizeFromSrc(Op), Src3Lower, CASResult);
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,7 @@
"Dest = %Expected",
"if (deref(%Addr) != %Expected) Dest = deref(%Addr)"
],

"TiedSource": 0,
"DestSize": "Size",
"ImplicitFlagClobber": true,
"EmitValidation": [
Expand Down
36 changes: 17 additions & 19 deletions unittests/InstructionCountCI/FlagM/Secondary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1075,16 +1075,14 @@
"mov x4, x21"
]
},
"cmpxchg [rax], rbx": {
"ExpectedInstructionCount": 6,
"cmpxchg [rcx], rbx": {
"ExpectedInstructionCount": 4,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"mov x1, x4",
"casal x1, x6, [x4]",
"mov x20, x1",
"eor x27, x4, x20",
"subs x26, x4, x20",
"mov x4, x20"
"mov x20, x4",
"casal x4, x6, [x7]",
"eor x27, x20, x4",
"subs x26, x20, x4"
]
},
"cmpxchg al, bl": {
Expand All @@ -1098,14 +1096,14 @@
"bfxil x4, x6, #0, #8"
]
},
"cmpxchg [rax], bl": {
"cmpxchg [rcx], bl": {
"ExpectedInstructionCount": 10,
"Comment": "0x0f 0xb0",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxtb w21, w4",
"mov w1, w21",
"casalb w1, w20, [x4]",
"uxtb x21, w4",
"mov w1, w4",
"casalb w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"lsl w0, w21, #24",
Expand All @@ -1125,14 +1123,14 @@
"bfxil x4, x6, #0, #16"
]
},
"cmpxchg [rax], bx": {
"cmpxchg [rcx], bx": {
"ExpectedInstructionCount": 10,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"mov w1, w21",
"casalh w1, w20, [x4]",
"uxth x21, w4",
"mov w1, w4",
"casalh w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"lsl w0, w21, #16",
Expand All @@ -1151,14 +1149,14 @@
"mov x4, x6"
]
},
"cmpxchg [rax], ebx": {
"cmpxchg [rcx], ebx": {
"ExpectedInstructionCount": 8,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w1, w21",
"casal w1, w20, [x4]",
"mov w1, w4",
"casal w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"subs w26, w21, w20",
Expand Down
36 changes: 17 additions & 19 deletions unittests/InstructionCountCI/Secondary.json
Original file line number Diff line number Diff line change
Expand Up @@ -1972,14 +1972,14 @@
"bfxil x4, x6, #0, #8"
]
},
"cmpxchg [rax], bl": {
"cmpxchg [rcx], bl": {
"ExpectedInstructionCount": 10,
"Comment": "0x0f 0xb0",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxtb w21, w4",
"mov w1, w21",
"casalb w1, w20, [x4]",
"uxtb x21, w4",
"mov w1, w4",
"casalb w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"lsl w0, w21, #24",
Expand All @@ -1999,14 +1999,14 @@
"bfxil x4, x6, #0, #16"
]
},
"cmpxchg [rax], bx": {
"cmpxchg [rcx], bx": {
"ExpectedInstructionCount": 10,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"mov w1, w21",
"casalh w1, w20, [x4]",
"uxth x21, w4",
"mov w1, w4",
"casalh w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"lsl w0, w21, #16",
Expand All @@ -2025,14 +2025,14 @@
"mov x4, x6"
]
},
"cmpxchg [rax], ebx": {
"cmpxchg [rcx], ebx": {
"ExpectedInstructionCount": 8,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w1, w21",
"casal w1, w20, [x4]",
"mov w1, w4",
"casal w1, w20, [x7]",
"mov w20, w1",
"eor x27, x21, x20",
"subs w26, w21, w20",
Expand All @@ -2048,16 +2048,14 @@
"mov x4, x6"
]
},
"cmpxchg [rax], rbx": {
"ExpectedInstructionCount": 6,
"cmpxchg [rcx], rbx": {
"ExpectedInstructionCount": 4,
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
"mov x1, x4",
"casal x1, x6, [x4]",
"mov x20, x1",
"eor x27, x4, x20",
"subs x26, x4, x20",
"mov x4, x20"
"mov x20, x4",
"casal x4, x6, [x7]",
"eor x27, x20, x4",
"subs x26, x20, x4"
]
},
"btr ax, bx": {
Expand Down

0 comments on commit a37d6a3

Please sign in to comment.