From e0f142574e2d103546f4b8267d897f04486d2606 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 17 Feb 2025 22:06:54 +0100 Subject: [PATCH 1/3] JIT: Clean up call arg lowering * Simplify the logic to be based purely off of new ABI info * Consistently insert bitcast nodes for register file mismatches when creating `PUTARG_REG` nodes and for `PUTARG_SPLIT` * Add a lowering optimization that removes `BITCAST` by changing the operand (by changing constants to other constants or the type of indirections) --- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/compiler.hpp | 4 +- src/coreclr/jit/gentree.cpp | 12 +- src/coreclr/jit/lower.cpp | 549 ++++++++++++++-------------------- src/coreclr/jit/lower.h | 14 +- src/coreclr/jit/morph.cpp | 17 +- src/coreclr/jit/promotion.cpp | 6 - 7 files changed, 240 insertions(+), 366 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ae1b0da5d21f34..95d530262122dd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3118,7 +3118,7 @@ class Compiler Statement* gtNewStmt(GenTree* expr, const DebugInfo& di); // For unary opers. - GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1); + GenTreeUnOp* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1); // For binary opers. GenTreeOp* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); @@ -3207,7 +3207,7 @@ class Compiler GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg); - GenTree* gtNewBitCastNode(var_types type, GenTree* arg); + GenTreeUnOp* gtNewBitCastNode(var_types type, GenTree* arg); public: GenTreeCall* gtNewCallNode(gtCallTypes callType, diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 1738810e149b4b..a7d6af44e7a4a2 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1411,14 +1411,14 @@ inline Statement* Compiler::gtNewStmt(GenTree* expr, const DebugInfo& di) return stmt; } -inline GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1) +inline GenTreeUnOp* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1) { assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0); assert((GenTree::OperKind(oper) & GTK_EXOP) == 0); // Can't use this to construct any types that extend unary/binary // operator. assert(op1 != nullptr || oper == GT_RETFILT || (oper == GT_RETURN && type == TYP_VOID)); - GenTree* node = new (this, oper) GenTreeOp(oper, type, op1, nullptr); + GenTreeUnOp* node = new (this, oper) GenTreeOp(oper, type, op1, nullptr); return node; } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c90f4ee43ed9d5..f054015a105c3c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9148,20 +9148,12 @@ GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg // Notes: // The node is generated as GenTreeMultiRegOp on RyuJIT/arm, as GenTreeOp on all the other archs. // -GenTree* Compiler::gtNewBitCastNode(var_types type, GenTree* arg) +GenTreeUnOp* Compiler::gtNewBitCastNode(var_types type, GenTree* arg) { assert(arg != nullptr); assert(type != TYP_STRUCT); - GenTree* node = nullptr; -#if defined(TARGET_ARM) - // A BITCAST could be a MultiRegOp on arm since we could move a double register to two int registers. - node = new (this, GT_BITCAST) GenTreeMultiRegOp(GT_BITCAST, type, arg, nullptr); -#else - node = gtNewOperNode(GT_BITCAST, type, arg); -#endif - - return node; + return gtNewOperNode(GT_BITCAST, type, arg); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5f8bdd9f27b3f6..77cfc63c95cc7c 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -549,8 +549,14 @@ GenTree* Lowering::LowerNode(GenTree* node) break; case GT_BITCAST: - ContainCheckBitCast(node); - break; + { + GenTree* next = node->gtNext; + if (!TryRemoveBitCast(node->AsUnOp())) + { + ContainCheckBitCast(node->AsUnOp()); + } + return next; + } #if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_BOUNDS_CHECK: @@ -1533,62 +1539,74 @@ bool Lowering::TryLowerSwitchToBitTest(FlowEdge* jumpTable[], } //------------------------------------------------------------------------ -// ReplaceArgWithPutArgOrBitcast: Insert a PUTARG_* node in the right location -// and replace the call operand with that node. +// LowerArg: +// Lower one argument of a call. This entails inserting putarg nodes between +// the call and the argument. This is the point at which the source is +// consumed and the value transitions from control of the register allocator +// to the calling convention. // // Arguments: -// argSlot - slot in call of argument -// putArgOrBitcast - the node that is being inserted +// call - The call node +// callArg - Call argument // -void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast) +void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg) { - assert(argSlot != nullptr); - assert(*argSlot != nullptr); - assert(putArgOrBitcast->OperIsPutArg() || putArgOrBitcast->OperIs(GT_BITCAST)); + GenTree** ppArg = &callArg->NodeRef(); + GenTree* arg = *ppArg; + assert(arg != nullptr); - GenTree* arg = *argSlot; + JITDUMP("Lowering arg: "); + DISPTREERANGE(BlockRange(), arg); + assert(arg->IsValue()); - // Replace the argument with the putarg/copy - *argSlot = putArgOrBitcast; - putArgOrBitcast->AsOp()->gtOp1 = arg; + // If we hit this we are probably double-lowering. + assert(!arg->OperIsPutArg()); - // Insert the putarg/copy into the block - BlockRange().InsertAfter(arg, putArgOrBitcast); -} + const ABIPassingInformation& abiInfo = callArg->NewAbiInfo; + JITDUMP("Passed in "); + DBEXEC(comp->verbose, abiInfo.Dump()); -//------------------------------------------------------------------------ -// NewPutArg: rewrites the tree to put an arg in a register or on the stack. -// -// Arguments: -// call - the call whose arg is being rewritten. -// arg - the arg being rewritten. -// callArg - the CallArg for the argument. -// type - the type of the argument. -// -// Return Value: -// The new tree that was created to put the arg in the right place -// or the incoming arg if the arg tree was not rewritten. -// -// Assumptions: -// call, arg, and info must be non-null. -// -// Notes: -// For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined) -// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs -// for two eightbyte structs. For STK passed structs the method generates GT_PUTARG_STK tree. -// -GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type) -{ - assert(call != nullptr); - assert(arg != nullptr); - assert(callArg != nullptr); +#if !defined(TARGET_64BIT) + if (comp->opts.compUseSoftFP && arg->TypeIs(TYP_DOUBLE)) + { + // Unlike TYP_LONG we do no decomposition for doubles, yet we maintain + // it as a primitive type until lowering. So we need to get it into the + // right form here. - GenTree* putArg = nullptr; - const ABIPassingInformation& abiInfo = callArg->NewAbiInfo; + unsigned argLclNum = comp->lvaGrabTemp(false DEBUGARG("double arg on softFP")); + GenTree* store = comp->gtNewTempStore(argLclNum, arg); + GenTree* low = comp->gtNewLclFldNode(argLclNum, TYP_INT, 0); + GenTree* high = comp->gtNewLclFldNode(argLclNum, TYP_INT, 4); + GenTree* longNode = new (comp, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, low, high); + BlockRange().InsertAfter(arg, store, low, high, longNode); + + *ppArg = arg = longNode; + + comp->lvaSetVarDoNotEnregister(argLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + + JITDUMP("Transformed double-typed arg on softFP to LONG node\n"); + } + + if (varTypeIsLong(arg)) + { + assert(callArg->NewAbiInfo.CountRegsAndStackSlots() == 2); + + noway_assert(arg->OperIs(GT_LONG)); + GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(); + fieldList->AddFieldLIR(comp, arg->gtGetOp1(), 0, TYP_INT); + fieldList->AddFieldLIR(comp, arg->gtGetOp2(), 4, TYP_INT); + BlockRange().InsertBefore(arg, fieldList); + + BlockRange().Remove(arg); + *ppArg = arg = fieldList; + + JITDUMP("Transformed long arg on 32-bit to FIELD_LIST node\n"); + } +#endif #if FEATURE_ARG_SPLIT - // Struct can be split into register(s) and stack on ARM - if (compFeatureArgSplit() && callArg->NewAbiInfo.IsSplitAcrossRegistersAndStack()) + // Structs can be split into register(s) and stack on some targets + if (compFeatureArgSplit() && abiInfo.IsSplitAcrossRegistersAndStack()) { assert(arg->OperIs(GT_BLK, GT_FIELD_LIST) || arg->OperIsLocalRead()); assert(!call->IsFastTailCall()); @@ -1603,7 +1621,8 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, unsigned numRegs = abiInfo.NumSegments - 1; const ABIPassingSegment& stackSeg = abiInfo.Segment(abiInfo.NumSegments - 1); - putArg = new (comp, GT_PUTARG_SPLIT) + assert(!call->IsFastTailCall()); + GenTree* putArg = new (comp, GT_PUTARG_SPLIT) GenTreePutArgSplit(arg, stackSeg.GetStackOffset(), stackSeg.GetStackSize(), abiInfo.NumSegments - 1, call, /* putInIncomingArgArea */ false); @@ -1622,13 +1641,9 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, { break; } - var_types regType = use.GetNode()->TypeGet(); - // Account for the possibility that float fields may be passed in integer registers. - if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(regIndex))) - { - regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; - } - argSplit->m_regType[regIndex] = regType; + + InsertBitCastIfNecessary(&use.NodeRef(), abiInfo.Segment(regIndex)); + argSplit->m_regType[regIndex] = use.GetNode()->TypeGet(); regIndex++; } @@ -1639,12 +1654,14 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, { ClassLayout* layout = arg->GetLayout(comp); - // Set type of registers for (unsigned index = 0; index < numRegs; index++) { argSplit->m_regType[index] = layout->GetGCPtrType(index); } } + + BlockRange().InsertAfter(arg, argSplit); + *ppArg = arg = argSplit; } else #endif // FEATURE_ARG_SPLIT @@ -1657,323 +1674,123 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, unsigned int regIndex = 0; for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) { - regNumber argReg = abiInfo.Segment(regIndex).GetRegister(); - GenTree* curOp = use.GetNode(); - var_types curTyp = curOp->TypeGet(); - - // Create a new GT_PUTARG_REG node with op1 - GenTree* newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg); + const ABIPassingSegment& segment = abiInfo.Segment(regIndex); + InsertPutArgReg(&use.NodeRef(), segment); - // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST - ReplaceArgWithPutArgOrBitcast(&use.NodeRef(), newOper); regIndex++; } - - // Just return arg. The GT_FIELD_LIST is not replaced. - // Nothing more to do. - return arg; } else #endif // FEATURE_MULTIREG_ARGS { assert(abiInfo.HasExactlyOneRegisterSegment()); - putArg = comp->gtNewPutArgReg(type, arg, abiInfo.Segment(0).GetRegister()); + InsertPutArgReg(ppArg, abiInfo.Segment(0)); + arg = *ppArg; } } else { -#ifdef FEATURE_SIMD - assert(arg->OperIsFieldList() || (genActualType(arg) == type) || - (arg->TypeIs(TYP_SIMD16) && (type == TYP_SIMD12))); -#else - assert(arg->OperIsFieldList() || (genActualType(arg) == type)); -#endif assert(abiInfo.NumSegments == 1); const ABIPassingSegment& stackSeg = abiInfo.Segment(0); const bool putInIncomingArgArea = call->IsFastTailCall(); - putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), - stackSeg.GetStackSize(), call, putInIncomingArgArea); + GenTree* putArg = + new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), + stackSeg.GetStackSize(), + call, putInIncomingArgArea); + + BlockRange().InsertAfter(arg, putArg); + *ppArg = arg = putArg; } } - JITDUMP("new node is : "); - DISPNODE(putArg); - JITDUMP("\n"); + if (arg->OperIsPutArgStk() || arg->OperIsPutArgSplit()) + { + LowerPutArgStkOrSplit(arg->AsPutArgStk()); + } - return putArg; + DISPTREERANGE(BlockRange(), arg); } //------------------------------------------------------------------------ -// LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between -// the argument evaluation and the call. This is the point at which the source is -// consumed and the value transitions from control of the register allocator to the calling -// convention. +// InsertBitCastIfNecessary: +// Insert a bitcast if a primitive argument being passed in a register is not +// evaluated in the right type of register. // // Arguments: -// call - The call node -// callArg - Call argument -// late - Whether it is the late arg that is being lowered. -// -// Return Value: -// None. +// argNode - Edge for the argument +// registerSegment - Register that the argument is going into // -void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late) +void Lowering::InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment) { - GenTree** ppArg = late ? &callArg->LateNodeRef() : &callArg->EarlyNodeRef(); - GenTree* arg = *ppArg; - assert(arg != nullptr); - - JITDUMP("lowering arg : "); - DISPNODE(arg); - assert(arg->IsValue()); - - var_types type = genActualType(arg); - -#if defined(FEATURE_SIMD) -#if defined(TARGET_X86) - // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their - // allocated size (see lvSize()). However, when passing the variables as arguments, and - // storing the variables to the outgoing argument area on the stack, we must use their - // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written. - if (type == TYP_SIMD16) - { - if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR)) - { - const LclVarDsc* varDsc = comp->lvaGetDesc(arg->AsLclVarCommon()); - type = varDsc->lvType; - } - else if (arg->OperIs(GT_HWINTRINSIC)) - { - GenTreeHWIntrinsic* hwintrinsic = arg->AsHWIntrinsic(); - - // For HWIntrinsic, there are some intrinsics like ExtractVector128 which have - // a gtType of TYP_SIMD16 but a SimdSize of 32, so we can't necessarily assert - // the simd size - - if (hwintrinsic->GetSimdSize() == 12) - { - if (hwintrinsic->GetHWIntrinsicId() != NI_Vector128_AsVector128Unsafe) - { - // Most nodes that have a simdSize of 12 are actually producing a TYP_SIMD12 - // and have been massaged to TYP_SIMD16 to match the actual product size. This - // is not the case for NI_Vector128_AsVector128Unsafe which is explicitly taking - // a TYP_SIMD12 and producing a TYP_SIMD16. - - type = TYP_SIMD12; - } - } - } - } -#elif defined(TARGET_AMD64) - // TYP_SIMD8 parameters that are passed as longs - if (type == TYP_SIMD8 && callArg->NewAbiInfo.HasExactlyOneRegisterSegment() && - genIsValidIntReg(callArg->NewAbiInfo.Segment(0).GetRegister())) + if (varTypeUsesIntReg(*argNode) == genIsValidIntReg(registerSegment.GetRegister())) { - GenTree* bitcast = comp->gtNewBitCastNode(TYP_LONG, arg); - BlockRange().InsertAfter(arg, bitcast); - - *ppArg = arg = bitcast; - type = TYP_LONG; + return; } -#endif // defined(TARGET_X86) -#endif // defined(FEATURE_SIMD) - // If we hit this we are probably double-lowering. - assert(!arg->OperIsPutArg()); + JITDUMP("Argument node [%06u] needs to be passed in %s; inserting bitcast\n", Compiler::dspTreeID(*argNode), + getRegName(registerSegment.GetRegister())); -#if !defined(TARGET_64BIT) - if (comp->opts.compUseSoftFP && (type == TYP_DOUBLE)) + // Due to padding the node may be smaller than the register segment. In + // such cases we cut off the end of the segment to get an appropriate + // register type for the bitcast. + ABIPassingSegment cutRegisterSegment = registerSegment; + unsigned argNodeSize = genTypeSize(genActualType(*argNode)); + if (registerSegment.Size > argNodeSize) { - // Unlike TYP_LONG we do no decomposition for doubles, yet we maintain - // it as a primitive type until lowering. So we need to get it into the - // right form here. - - unsigned argLclNum = comp->lvaGrabTemp(false DEBUGARG("double arg on softFP")); - GenTree* store = comp->gtNewTempStore(argLclNum, arg); - GenTree* low = comp->gtNewLclFldNode(argLclNum, TYP_INT, 0); - GenTree* high = comp->gtNewLclFldNode(argLclNum, TYP_INT, 4); - GenTree* longNode = new (comp, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, low, high); - BlockRange().InsertAfter(arg, store, low, high, longNode); - - *ppArg = arg = longNode; - type = TYP_LONG; - - comp->lvaSetVarDoNotEnregister(argLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); - - JITDUMP("Created new nodes for double-typed arg on softFP:\n"); - DISPRANGE(LIR::ReadOnlyRange(store, longNode)); + cutRegisterSegment = ABIPassingSegment::InRegister(registerSegment.GetRegister(), registerSegment.Offset, argNodeSize); } - if (varTypeIsLong(type)) - { - noway_assert(arg->OperIs(GT_LONG)); - GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(); - fieldList->AddFieldLIR(comp, arg->AsOp()->gtGetOp1(), 0, TYP_INT); - fieldList->AddFieldLIR(comp, arg->AsOp()->gtGetOp2(), 4, TYP_INT); - GenTree* newArg = NewPutArg(call, fieldList, callArg, type); + var_types bitCastType = cutRegisterSegment.GetRegisterType(); - if (callArg->AbiInfo.GetRegNum() != REG_STK) - { - assert(callArg->AbiInfo.NumRegs == 2); - // In the register argument case, NewPutArg replaces the original field list args with new - // GT_PUTARG_REG nodes, inserts them in linear order and returns the field list. So the - // only thing left to do is to insert the field list itself in linear order. - assert(newArg == fieldList); - BlockRange().InsertBefore(arg, newArg); - } - else - { - // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK. - // Although the hi argument needs to be pushed first, that will be handled by the general case, - // in which the fields will be reversed. - assert(callArg->AbiInfo.GetStackSlotsNumber() == 2); - newArg->SetRegNum(REG_STK); - BlockRange().InsertBefore(arg, fieldList, newArg); - } + GenTreeUnOp* bitCast = comp->gtNewBitCastNode(bitCastType, *argNode); + BlockRange().InsertAfter(*argNode, bitCast); - *ppArg = newArg; - BlockRange().Remove(arg); - } - else -#endif // !defined(TARGET_64BIT) + *argNode = bitCast; + if (!TryRemoveBitCast(bitCast)) { - -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (call->IsVarargs() || comp->opts.compUseSoftFP || callArg->AbiInfo.IsMismatchedArgType()) - { - // Insert copies as needed to move float value to integer register - // if the ABI requires it. - GenTree* newNode = LowerFloatArg(ppArg, callArg); - if (newNode != nullptr) - { - type = newNode->TypeGet(); - } - } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 - - GenTree* putArg = NewPutArg(call, arg, callArg, type); - - // In the case of register passable struct (in one or two registers) - // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) - // If an extra node is returned, splice it in the right place in the tree. - if (arg != putArg) - { - ReplaceArgWithPutArgOrBitcast(ppArg, putArg); - } - } - - arg = *ppArg; - - if (arg->OperIsPutArgStk() || arg->OperIsPutArgSplit()) - { - LowerPutArgStkOrSplit(arg->AsPutArgStk()); + ContainCheckBitCast(bitCast); } } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ -// LowerFloatArg: Lower float call arguments on the arm/LoongArch64/RiscV64 platform. +// InsertPutArgReg: +// Insert a PUTARG_REG node for the specified edge. If the argument node does +// not fit the register type, then also insert a bitcast. // // Arguments: -// arg - The arg node -// callArg - call argument info +// argNode - Edge for the argument +// registerSegment - Register that the argument is going into // -// Return Value: -// Return nullptr, if no transformation was done; -// return arg if there was in place transformation; -// return a new tree if the root was changed. -// -// Notes: -// This must handle scalar float arguments as well as GT_FIELD_LISTs -// with floating point fields. -// -GenTree* Lowering::LowerFloatArg(GenTree** pArg, CallArg* callArg) +void Lowering::InsertPutArgReg(GenTree** argNode, const ABIPassingSegment& registerSegment) { - GenTree* arg = *pArg; - if (callArg->AbiInfo.GetRegNum() != REG_STK) - { - if (arg->OperIs(GT_FIELD_LIST)) - { - // Transform fields that are passed as registers in place. - regNumber currRegNumber = callArg->AbiInfo.GetRegNum(); - unsigned regIndex = 0; - for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses()) - { - if (regIndex >= callArg->AbiInfo.NumRegs) - { - break; - } - GenTree* node = use.GetNode(); - if (varTypeUsesFloatReg(node)) - { - GenTree* intNode = LowerFloatArgReg(node, currRegNumber); - assert(intNode != nullptr); - - ReplaceArgWithPutArgOrBitcast(&use.NodeRef(), intNode); - } + assert(registerSegment.IsPassedInRegister()); - if (node->TypeGet() == TYP_DOUBLE) - { - currRegNumber = REG_NEXT(REG_NEXT(currRegNumber)); - regIndex += 2; - } - else - { - currRegNumber = REG_NEXT(currRegNumber); - regIndex += 1; - } - } - // List fields were replaced in place. - return arg; - } - else if (varTypeUsesFloatReg(arg)) - { - GenTree* intNode = LowerFloatArgReg(arg, callArg->AbiInfo.GetRegNum()); - assert(intNode != nullptr); - ReplaceArgWithPutArgOrBitcast(pArg, intNode); - return *pArg; - } - } - return nullptr; + InsertBitCastIfNecessary(argNode, registerSegment); + GenTree* putArg = comp->gtNewPutArgReg(genActualType(*argNode), *argNode, registerSegment.GetRegister()); + BlockRange().InsertAfter(*argNode, putArg); + *argNode = putArg; } //------------------------------------------------------------------------ -// LowerFloatArgReg: Lower the float call argument node that is passed via register. +// LowerArgsForCall: +// Lower the arguments of a call node. // // Arguments: -// arg - The arg node -// regNum - register number +// call - Call node // -// Return Value: -// Return new bitcast node, that moves float to int register. -// -GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum) -{ - assert(varTypeUsesFloatReg(arg)); - - var_types floatType = arg->TypeGet(); - var_types intType = (floatType == TYP_FLOAT) ? TYP_INT : TYP_LONG; - GenTree* intArg = comp->gtNewBitCastNode(intType, arg); - intArg->SetRegNum(regNum); - - return intArg; -} -#endif - -// do lowering steps for each arg of a call void Lowering::LowerArgsForCall(GenTreeCall* call) { - JITDUMP("args:\n======\n"); + JITDUMP("Args:\n======\n"); for (CallArg& arg : call->gtArgs.EarlyArgs()) { - LowerArg(call, &arg, false); + LowerArg(call, &arg); } - JITDUMP("\nlate:\n======\n"); + JITDUMP("\nLate args:\n======\n"); for (CallArg& arg : call->gtArgs.LateArgs()) { - LowerArg(call, &arg, true); + LowerArg(call, &arg); } #if defined(TARGET_X86) && defined(FEATURE_IJW) @@ -3724,7 +3541,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call) targetArg->AbiInfo.ByteSize = TARGET_POINTER_SIZE; // Lower the newly added args now that call is updated - LowerArg(call, targetArg, true /* late */); + LowerArg(call, targetArg); // Finally update the call to be a helper call call->gtCallType = CT_HELPER; @@ -4850,7 +4667,7 @@ void Lowering::LowerRet(GenTreeOp* ret) assert(!varTypeIsStruct(ret) && !varTypeIsStruct(retVal)); #endif - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); ret->SetReturnValue(bitcast); BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); @@ -5108,7 +4925,7 @@ GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) assert(lclStore->OperIsLocalStore()); assert(lclRegType != TYP_UNDEF); - GenTree* bitcast = comp->gtNewBitCastNode(lclRegType, src); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(lclRegType, src); lclStore->gtOp1 = bitcast; src = lclStore->gtGetOp1(); BlockRange().InsertBefore(lclStore, bitcast); @@ -5225,7 +5042,7 @@ void Lowering::LowerRetStruct(GenTreeUnOp* ret) assert(varTypeIsEnregisterable(retVal)); if (!varTypeUsesSameRegType(ret, retVal)) { - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); ret->gtOp1 = bitcast; BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); @@ -5297,7 +5114,7 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) if (!varTypeUsesSameRegType(ret, lclVarType)) { - GenTree* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), ret->gtOp1); + GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), ret->gtOp1); ret->AsOp()->SetReturnValue(bitcast); BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); @@ -5395,7 +5212,7 @@ void Lowering::LowerCallStruct(GenTreeCall* call) { if (!varTypeUsesSameRegType(returnType, origType)) { - GenTree* bitCast = comp->gtNewBitCastNode(origType, call); + GenTreeUnOp* bitCast = comp->gtNewBitCastNode(origType, call); BlockRange().InsertAfter(call, bitCast); callUse.ReplaceWith(bitCast); ContainCheckBitCast(bitCast); @@ -8755,7 +8572,7 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckCast(node->AsCast()); break; case GT_BITCAST: - ContainCheckBitCast(node); + ContainCheckBitCast(node->AsUnOp()); break; case GT_LCLHEAP: ContainCheckLclHeap(node->AsOp()); @@ -8871,15 +8688,103 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) #endif // FEATURE_MULTIREG_RET } +//------------------------------------------------------------------------ +// TryRemoveBitCast: +// Try to remove a bitcast node if it is a no-op, or by changing its operand. +// +// Arguments: +// node - Bitcast node +// +// Returns: +// True if the bitcast was removed. +// +bool Lowering::TryRemoveBitCast(GenTreeUnOp* node) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + GenTree* op = node->gtGetOp1(); + assert(genTypeSize(node) == genTypeSize(genActualType(op))); + + bool changed = false; +#ifdef FEATURE_SIMD + bool isConst = op->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC); +#else + bool isConst = op->OperIs(GT_CNS_INT, GT_CNS_DBL); +#endif + + if (isConst) + { + uint8_t bits[sizeof(simd_t)]; + if (op->OperIs(GT_CNS_INT)) + { + ssize_t cns = op->AsIntCon()->IconValue(); + assert(sizeof(ssize_t) >= genTypeSize(genActualType(op)) && (sizeof(bits) >= genTypeSize(genActualType(op)))); + memcpy(bits, &cns, genTypeSize(genActualType(op))); + } +#ifdef FEATURE_SIMD + else if (op->OperIs(GT_CNS_VEC)) + { + assert(sizeof(bits) >= genTypeSize(op)); + memcpy(bits, &op->AsVecCon()->gtSimdVal, genTypeSize(op)); + } +#endif + else + { + if (op->TypeIs(TYP_FLOAT)) + { + float floatVal = FloatingPointUtils::convertToSingle(op->AsDblCon()->DconValue()); + assert(sizeof(bits) >= sizeof(float)); + memcpy(bits, &floatVal, sizeof(float)); + } + else + { + double doubleVal = op->AsDblCon()->DconValue(); + assert(sizeof(bits) >= sizeof(double)); + memcpy(bits, &doubleVal, sizeof(double)); + } + } + + GenTree* newCon = comp->gtNewGenericCon(node->TypeGet(), bits); + BlockRange().InsertAfter(op, newCon); + BlockRange().Remove(op); + + node->gtOp1 = op = newCon; + + changed = true; + } + else if (op->OperIs(GT_LCL_FLD, GT_IND)) + { + op->ChangeType(node->TypeGet()); + changed = true; + } + + if (!changed) + { + return false; + } + + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(op); + } + + BlockRange().Remove(node); + return true; +} + //------------------------------------------------------------------------ // ContainCheckBitCast: determine whether the source of a BITCAST should be contained. // // Arguments: // node - pointer to the node // -void Lowering::ContainCheckBitCast(GenTree* node) +void Lowering::ContainCheckBitCast(GenTreeUnOp* node) { - GenTree* const op1 = node->AsOp()->gtOp1; + GenTree* const op1 = node->gtGetOp1(); if (op1->OperIs(GT_LCL_VAR) && (genTypeSize(op1) == genTypeSize(node))) { if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1)) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 86b336c517f3c1..a4b3573d8c1fa6 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -102,7 +102,7 @@ class Lowering final : public Phase bool TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntrinsic* childNode); #endif void ContainCheckSelect(GenTreeOp* select); - void ContainCheckBitCast(GenTree* node); + void ContainCheckBitCast(GenTreeUnOp* node); void ContainCheckCallOperands(GenTreeCall* call); void ContainCheckIndir(GenTreeIndir* indirNode); void ContainCheckStoreIndir(GenTreeStoreInd* indirNode); @@ -190,17 +190,13 @@ class Lowering final : public Phase GenTree* LowerVirtualVtableCall(GenTreeCall* call); GenTree* LowerVirtualStubCall(GenTreeCall* call); void LowerArgsForCall(GenTreeCall* call); - void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode); #if defined(TARGET_X86) && defined(FEATURE_IJW) void LowerSpecialCopyArgs(GenTreeCall* call); void InsertSpecialCopyArg(GenTreePutArgStk* putArgStk, CORINFO_CLASS_HANDLE argType, unsigned lclNum); #endif // defined(TARGET_X86) && defined(FEATURE_IJW) - GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type); - void LowerArg(GenTreeCall* call, CallArg* callArg, bool late); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - GenTree* LowerFloatArg(GenTree** pArg, CallArg* callArg); - GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum); -#endif + void LowerArg(GenTreeCall* call, CallArg* callArg); + void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); + void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); void LegalizeArgPlacement(GenTreeCall* call); void InsertPInvokeCallProlog(GenTreeCall* call); @@ -388,6 +384,8 @@ class Lowering final : public Phase void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode); GenTree* LowerArrLength(GenTreeArrCommon* node); + bool TryRemoveBitCast(GenTreeUnOp* node); + #ifdef TARGET_XARCH void LowerPutArgStk(GenTreePutArgStk* putArgStk); GenTree* TryLowerMulWithConstant(GenTreeOp* node); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 24baa3519e073e..a9d4d0e36d89da 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2545,20 +2545,6 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) fieldsMatch = false; break; } - - var_types fieldType = lvaGetDesc(fieldLclNum)->TypeGet(); - var_types regType = genActualType(seg.GetRegisterType()); - - if (!varTypeUsesSameRegType(fieldType, regType)) - { - // TODO-CQ: We should be able to tolerate mismatches by inserting GT_BITCAST in lowering. - // - JITDUMP("Struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " - "register type is %s, field local V%02u's type is %s\n", - lclNum, varTypeName(regType), fieldLclNum, varTypeName(fieldType)); - fieldsMatch = false; - break; - } } else { @@ -2708,8 +2694,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) // We sometimes end up with struct reinterpretations where the // retyping into a primitive allows us to replace by a scalar // local here, so make sure we do that if possible. - if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc)) && - varTypeUsesSameRegType(type, dsc)) + if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc))) { result = gtNewLclVarNode(lclVar->GetLclNum()); } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 55f6c88b2b7d23..63cfc6396c07f9 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2348,12 +2348,6 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* return false; } - // Finally, the backend requires the register types to match. - if (!varTypeUsesSameRegType(rep.AccessType, seg.GetRegisterType())) - { - return false; - } - return true; }; From 143d5b888475ba4e96272fcd43a52d20121a18bc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 18 Feb 2025 00:15:15 +0100 Subject: [PATCH 2/3] Run jit-format, fix function header, simplify assert --- src/coreclr/jit/lower.cpp | 22 ++++++++++------------ src/coreclr/jit/lower.h | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 77cfc63c95cc7c..c5d7c48a47277b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1696,8 +1696,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg) GenTree* putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), - stackSeg.GetStackSize(), - call, putInIncomingArgArea); + stackSeg.GetStackSize(), call, putInIncomingArgArea); BlockRange().InsertAfter(arg, putArg); *ppArg = arg = putArg; @@ -1735,10 +1734,11 @@ void Lowering::InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegme // such cases we cut off the end of the segment to get an appropriate // register type for the bitcast. ABIPassingSegment cutRegisterSegment = registerSegment; - unsigned argNodeSize = genTypeSize(genActualType(*argNode)); + unsigned argNodeSize = genTypeSize(genActualType(*argNode)); if (registerSegment.Size > argNodeSize) { - cutRegisterSegment = ABIPassingSegment::InRegister(registerSegment.GetRegister(), registerSegment.Offset, argNodeSize); + cutRegisterSegment = + ABIPassingSegment::InRegister(registerSegment.GetRegister(), registerSegment.Offset, argNodeSize); } var_types bitCastType = cutRegisterSegment.GetRegisterType(); @@ -4926,8 +4926,8 @@ GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) assert(lclRegType != TYP_UNDEF); GenTreeUnOp* bitcast = comp->gtNewBitCastNode(lclRegType, src); - lclStore->gtOp1 = bitcast; - src = lclStore->gtGetOp1(); + lclStore->gtOp1 = bitcast; + src = lclStore->gtGetOp1(); BlockRange().InsertBefore(lclStore, bitcast); ContainCheckBitCast(bitcast); } @@ -5043,7 +5043,7 @@ void Lowering::LowerRetStruct(GenTreeUnOp* ret) if (!varTypeUsesSameRegType(ret, retVal)) { GenTreeUnOp* bitcast = comp->gtNewBitCastNode(ret->TypeGet(), retVal); - ret->gtOp1 = bitcast; + ret->gtOp1 = bitcast; BlockRange().InsertBefore(ret, bitcast); ContainCheckBitCast(bitcast); } @@ -8690,7 +8690,7 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) //------------------------------------------------------------------------ // TryRemoveBitCast: -// Try to remove a bitcast node if it is a no-op, or by changing its operand. +// Try to remove a bitcast node by changing its operand. // // Arguments: // node - Bitcast node @@ -8718,16 +8718,16 @@ bool Lowering::TryRemoveBitCast(GenTreeUnOp* node) if (isConst) { uint8_t bits[sizeof(simd_t)]; + assert(sizeof(bits) >= genTypeSize(genActualType(op))); if (op->OperIs(GT_CNS_INT)) { ssize_t cns = op->AsIntCon()->IconValue(); - assert(sizeof(ssize_t) >= genTypeSize(genActualType(op)) && (sizeof(bits) >= genTypeSize(genActualType(op)))); + assert(sizeof(ssize_t) >= genTypeSize(genActualType(op))); memcpy(bits, &cns, genTypeSize(genActualType(op))); } #ifdef FEATURE_SIMD else if (op->OperIs(GT_CNS_VEC)) { - assert(sizeof(bits) >= genTypeSize(op)); memcpy(bits, &op->AsVecCon()->gtSimdVal, genTypeSize(op)); } #endif @@ -8736,13 +8736,11 @@ bool Lowering::TryRemoveBitCast(GenTreeUnOp* node) if (op->TypeIs(TYP_FLOAT)) { float floatVal = FloatingPointUtils::convertToSingle(op->AsDblCon()->DconValue()); - assert(sizeof(bits) >= sizeof(float)); memcpy(bits, &floatVal, sizeof(float)); } else { double doubleVal = op->AsDblCon()->DconValue(); - assert(sizeof(bits) >= sizeof(double)); memcpy(bits, &doubleVal, sizeof(double)); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index a4b3573d8c1fa6..43a9b761aa5693 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -384,7 +384,7 @@ class Lowering final : public Phase void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode); GenTree* LowerArrLength(GenTreeArrCommon* node); - bool TryRemoveBitCast(GenTreeUnOp* node); + bool TryRemoveBitCast(GenTreeUnOp* node); #ifdef TARGET_XARCH void LowerPutArgStk(GenTreePutArgStk* putArgStk); From 1fc7f81b0c2e709e9a0f6c3537803a7cc1e165c4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 18 Feb 2025 17:41:02 +0100 Subject: [PATCH 3/3] Update src/coreclr/jit/lower.cpp --- src/coreclr/jit/lower.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c5d7c48a47277b..534810980587a6 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8769,6 +8769,10 @@ bool Lowering::TryRemoveBitCast(GenTreeUnOp* node) { use.ReplaceWith(op); } + else + { + op->SetUnusedValue(); + } BlockRange().Remove(node); return true;