diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 43553c42018482..43d603dfcce075 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2047,12 +2047,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_XCHG: case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - case GT_XORR: case GT_XAND: - NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet."); + genLockedInstructions(treeNode->AsOp()); break; case GT_MEMORYBARRIER: @@ -4354,7 +4351,7 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node) // void CodeGen::genLockedInstructions(GenTreeOp* node) { - assert(node->OperIs(GT_XADD, GT_XCHG)); + assert(node->OperIs(GT_XADD, GT_XCHG, GT_XORR, GT_XAND)); GenTree* addr = node->gtGetOp1(); GenTree* data = node->gtGetOp2(); @@ -4366,6 +4363,56 @@ void CodeGen::genLockedInstructions(GenTreeOp* node) genConsumeOperands(node); + if (node->OperIs(GT_XORR, GT_XAND)) + { + const instruction ins = node->OperIs(GT_XORR) ? INS_or : INS_and; + + if (node->IsUnusedValue()) + { + // If value is not used we can emit a short form: + // + // lock + // or/and dword ptr [addrReg], val + // + instGen(INS_lock); + GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0); + } + else + { + // When value is used (it's the original value of the memory location) + // we fallback to cmpxchg-loop idiom. + + // for cmpxchg we need to keep the original value in RAX + assert(node->GetRegNum() == REG_RAX); + + // mov RAX, dword ptr [addrReg] + //.LOOP: + // mov tmp, RAX + // or/and tmp, val + // lock + // cmpxchg dword ptr [addrReg], tmp + // jne .LOOP + // ret + + // Extend liveness of addr + gcInfo.gcMarkRegPtrVal(addr->GetRegNum(), addr->TypeGet()); + + const regNumber tmpReg = node->GetSingleTempReg(); + GetEmitter()->emitIns_R_AR(INS_mov, size, REG_RAX, addr->GetRegNum(), 0); + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + GetEmitter()->emitIns_Mov(INS_mov, size, tmpReg, REG_RAX, false); + GetEmitter()->emitIns_R_R(ins, size, tmpReg, data->GetRegNum()); + instGen(INS_lock); + GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0); + inst_JMP(EJ_jne, loop); + + gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum())); + genProduceReg(node); + } + return; + } + // If the destination register is different from the data register then we need // to first move the data to the target register. Make sure we don't overwrite // the address, the register allocator should have taken care of this. diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b5eae4a04312eb..715c00cd9ba2a3 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3239,14 +3239,20 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } -#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) - // Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer) and for RV64A - // TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239). +#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH) case NI_System_Threading_Interlocked_Or: case NI_System_Threading_Interlocked_And: { - ARM64_ONLY(if (compOpportunisticallyDependsOn(InstructionSet_Atomics))) +#if defined(TARGET_ARM64) + if (compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif { +#if defined(TARGET_X86) + if (genActualType(callType) == TYP_LONG) + { + break; + } +#endif assert(sig->numArgs == 2); GenTree* op2 = impPopStack().val; GenTree* op1 = impPopStack().val; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3263d8c4ba308a..831f897fe06507 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -640,8 +640,6 @@ GenTree* Lowering::LowerNode(GenTree* node) CheckImmedAndMakeContained(node, node->AsOp()->gtOp2); break; #elif defined(TARGET_XARCH) - case GT_XORR: - case GT_XAND: case GT_XADD: if (node->IsUnusedValue()) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 322b6e4ef02c33..ecdb69ed971901 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -436,6 +436,19 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XORR: case GT_XAND: + if (!tree->IsUnusedValue()) + { + // if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX) + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); + BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX); + BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX); + BuildDef(tree, RBM_RAX); + buildInternalRegisterUses(); + srcCount = 2; + assert(dstCount == 1); + break; + } + FALLTHROUGH; case GT_XADD: case GT_XCHG: {