From 4759e86ca7c632a4d686cc80bd39be66211c7512 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 4 Jan 2024 22:10:41 +0100 Subject: [PATCH 1/4] Intrinsify Interlocked.And/Or --- src/coreclr/jit/codegenxarch.cpp | 57 ++++++++++++++++++++++++++++--- src/coreclr/jit/importercalls.cpp | 8 ++--- src/coreclr/jit/lower.cpp | 2 -- src/coreclr/jit/lsraxarch.cpp | 11 ++++++ 4 files changed, 67 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 43553c42018482..43d603dfcce075 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2047,12 +2047,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_XCHG: case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - case GT_XORR: case GT_XAND: - NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet."); + genLockedInstructions(treeNode->AsOp()); break; case GT_MEMORYBARRIER: @@ -4354,7 +4351,7 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node) // void CodeGen::genLockedInstructions(GenTreeOp* node) { - assert(node->OperIs(GT_XADD, GT_XCHG)); + assert(node->OperIs(GT_XADD, GT_XCHG, GT_XORR, GT_XAND)); GenTree* addr = node->gtGetOp1(); GenTree* data = node->gtGetOp2(); @@ -4366,6 +4363,56 @@ void CodeGen::genLockedInstructions(GenTreeOp* node) genConsumeOperands(node); + if (node->OperIs(GT_XORR, GT_XAND)) + { + const instruction ins = node->OperIs(GT_XORR) ? INS_or : INS_and; + + if (node->IsUnusedValue()) + { + // If value is not used we can emit a short form: + // + // lock + // or/and dword ptr [addrReg], val + // + instGen(INS_lock); + GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0); + } + else + { + // When value is used (it's the original value of the memory location) + // we fallback to cmpxchg-loop idiom. + + // for cmpxchg we need to keep the original value in RAX + assert(node->GetRegNum() == REG_RAX); + + // mov RAX, dword ptr [addrReg] + //.LOOP: + // mov tmp, RAX + // or/and tmp, val + // lock + // cmpxchg dword ptr [addrReg], tmp + // jne .LOOP + // ret + + // Extend liveness of addr + gcInfo.gcMarkRegPtrVal(addr->GetRegNum(), addr->TypeGet()); + + const regNumber tmpReg = node->GetSingleTempReg(); + GetEmitter()->emitIns_R_AR(INS_mov, size, REG_RAX, addr->GetRegNum(), 0); + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + GetEmitter()->emitIns_Mov(INS_mov, size, tmpReg, REG_RAX, false); + GetEmitter()->emitIns_R_R(ins, size, tmpReg, data->GetRegNum()); + instGen(INS_lock); + GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0); + inst_JMP(EJ_jne, loop); + + gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum())); + genProduceReg(node); + } + return; + } + // If the destination register is different from the data register then we need // to first move the data to the target register. Make sure we don't overwrite // the address, the register allocator should have taken care of this. diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b5eae4a04312eb..43896c1d4249d2 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3239,13 +3239,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } -#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) - // Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer) and for RV64A - // TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239). +#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH) case NI_System_Threading_Interlocked_Or: case NI_System_Threading_Interlocked_And: { - ARM64_ONLY(if (compOpportunisticallyDependsOn(InstructionSet_Atomics))) +#if defined(TARGET_ARM64) + if (compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif { assert(sig->numArgs == 2); GenTree* op2 = impPopStack().val; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3263d8c4ba308a..831f897fe06507 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -640,8 +640,6 @@ GenTree* Lowering::LowerNode(GenTree* node) CheckImmedAndMakeContained(node, node->AsOp()->gtOp2); break; #elif defined(TARGET_XARCH) - case GT_XORR: - case GT_XAND: case GT_XADD: if (node->IsUnusedValue()) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 322b6e4ef02c33..362ceb9bc4f5cf 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -436,6 +436,17 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XORR: case GT_XAND: + if (!tree->IsUnusedValue()) + { + // if value is unused, we'll emit a cmpxchg-loop idiom (requires RAX) + BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX); + BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX); + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); + BuildDef(tree, RBM_RAX); + srcCount = 2; + assert(dstCount == 1); + } + FALLTHROUGH; case GT_XADD: case GT_XCHG: { From 988c8b7958b0f037b29f51bf3373f124103c955a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 4 Jan 2024 23:04:33 +0100 Subject: [PATCH 2/4] fix 32bit --- src/coreclr/jit/importercalls.cpp | 6 ++++++ src/coreclr/jit/lsraxarch.cpp | 1 + 2 files changed, 7 insertions(+) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 43896c1d4249d2..715c00cd9ba2a3 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3247,6 +3247,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (compOpportunisticallyDependsOn(InstructionSet_Atomics)) #endif { +#if defined(TARGET_X86) + if (genActualType(callType) == TYP_LONG) + { + break; + } +#endif assert(sig->numArgs == 2); GenTree* op2 = impPopStack().val; GenTree* op1 = impPopStack().val; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 362ceb9bc4f5cf..d4e786361276e8 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -445,6 +445,7 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_RAX); srcCount = 2; assert(dstCount == 1); + break; } FALLTHROUGH; case GT_XADD: From bcffc5d4e9a3b6f40d26497ec3136f0baf13409c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 5 Jan 2024 00:15:59 +0100 Subject: [PATCH 3/4] forgot to call buildInternalRegisterUses --- src/coreclr/jit/lsraxarch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index d4e786361276e8..faa0b8e8d4009c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -439,10 +439,11 @@ int LinearScan::BuildNode(GenTree* tree) if (!tree->IsUnusedValue()) { // if value is unused, we'll emit a cmpxchg-loop idiom (requires RAX) + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX); BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX); - buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); BuildDef(tree, RBM_RAX); + buildInternalRegisterUses(); srcCount = 2; assert(dstCount == 1); break; From 000286f7b0da4fb1d1ca66f6f2fe4543e8c959e3 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Fri, 5 Jan 2024 16:49:46 +0100 Subject: [PATCH 4/4] Update lsraxarch.cpp --- src/coreclr/jit/lsraxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index faa0b8e8d4009c..ecdb69ed971901 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -438,7 +438,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XAND: if (!tree->IsUnusedValue()) { - // if value is unused, we'll emit a cmpxchg-loop idiom (requires RAX) + // if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX) buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX); BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX);