diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 530919cdd985..8435b786d5ca 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -1949,6 +1949,13 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) } else { + // TODO-CQ: If the local frame offset is too large to be encoded, the emitter automatically + // loads the offset into a reserved register (see CodeGen::rsGetRsvdReg()). If we generate + // multiple store instructions we'll also generate multiple offset loading instructions. + // We could try to detect such cases, compute the base destination address in this reserved + // and use it in all store instructions we generate. Basically this would undo the effect + // of local address containment done by lowering. + assert(dstAddr->OperIsLocalAddr()); dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); @@ -2078,6 +2085,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) } else { + // TODO-CQ: If the local frame offset is too large to be encoded, the emitter automatically + // loads the offset into a reserved register (see CodeGen::rsGetRsvdReg()). If we generate + // multiple store instructions we'll also generate multiple offset loading instructions. + // We could try to detect such cases, compute the base destination address in this reserved + // and use it in all store instructions we generate. Basically this would undo the effect + // of local address containment done by lowering. + // + // Same for the local source address case below. + assert(dstAddr->OperIsLocalAddr()); dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 7b74f02c4929..cb6168772895 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -3083,6 +3083,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); #ifdef _TARGET_X86_ // For x86, we need one more to convert it from 8 bytes to 16 bytes. + // TODO-CQ: Instead of 2 punpckldq we could use a single pshufd. This can also be useful + // on x64 as it may avoid the need for an 8 byte immediate. emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); #endif }