Skip to content

Commit

Permalink
[LoongArch] Merge base and offset for large offsets (#113277)
Browse files Browse the repository at this point in the history
This PR merges large offsets into the base address loading.
  • Loading branch information
heiher authored Oct 23, 2024
1 parent 401d123 commit b225b15
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 105 deletions.
167 changes: 99 additions & 68 deletions llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
int64_t Offset) {
assert(isInt<32>(Offset) && "Unexpected offset");
// Put the offset back in Hi and the Lo
Hi20.getOperand(1).setOffset(Offset);
Lo12.getOperand(2).setOffset(Offset);
Expand All @@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
// instructions and deletes TailAdd and the instructions that produced the
// offset.
//
// Base address lowering is of the form:
// Hi20: pcalau12i vreg1, %pc_hi20(s)
// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
// / \
// / \
// / \
// / The large offset can be of two forms: \
// 1) Offset that has non zero bits in lower 2) Offset that has non zero
// 12 bits and upper 20 bits bits in upper 20 bits only
// OffsetHi: lu12i.w vreg3, 4
// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
// \ /
// \ /
// \ /
// \ /
// TailAdd: add.d vreg4, vreg2, voff
// (The instructions marked with "!" are not necessarily present)
//
// Base address lowering is of the form:
// Hi20: pcalau12i vreg1, %pc_hi20(s)
// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
// |
// | The large offset can be one of the forms:
// |
// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
// | OffsetHi20: lu12i.w vreg3, 4
// | OffsetLo12: ori voff, vreg3, 188 ------------------+
// | |
// +-> 2) Offset that has non zero bits in Hi20 bits only: |
// | OffsetHi20: lu12i.w voff, 128 ------------------+
// | |
// +-> 3) Offset that has non zero bits in Lo20 bits: |
// | OffsetHi20: lu12i.w vreg3, 121 ! |
// | OffsetLo12: ori voff, vreg3, 122 ! |
// | OffsetLo20: lu32i.d voff, 123 ------------------+
// +-> 4) Offset that has non zero bits in Hi12 bits: |
// OffsetHi20: lu12i.w vreg3, 121 ! |
// OffsetLo12: ori voff, vreg3, 122 ! |
// OffsetLo20: lu32i.d vreg3, 123 ! |
// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
// |
// TailAdd: add.d vreg4, vreg2, voff <------------------+
//
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
Expand All @@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
Register Rs = TailAdd.getOperand(1).getReg();
Register Rt = TailAdd.getOperand(2).getReg();
Register Reg = Rs == GAReg ? Rt : Rs;
SmallVector<MachineInstr *, 4> Instrs;
int64_t Offset = 0;
int64_t Mask = -1;

// This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
for (int i = 0; i < 4; i++) {
// Handle Reg is R0.
if (Reg == LoongArch::R0)
break;

// Can't fold if the register has more than one use.
if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
return false;
// This can point to an ORI or a LU12I.W:
MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
if (OffsetTail.getOpcode() == LoongArch::ORI) {
// The offset value has non zero bits in both %hi and %lo parts.
// Detect an ORI that feeds from a LU12I.W instruction.
MachineOperand &OriImmOp = OffsetTail.getOperand(2);
if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
// Can't fold if the register has more than one use.
if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
return false;
Register OriReg = OffsetTail.getOperand(1).getReg();
int64_t OffLo = OriImmOp.getImm();

// Handle rs1 of ORI is R0.
if (OriReg == LoongArch::R0) {
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
OffsetTail.eraseFromParent();
return true;
}

MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
!MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
MachineInstr *Curr = MRI->getVRegDef(Reg);
if (!Curr)
break;

switch (Curr->getOpcode()) {
default:
// Can't fold if the instruction opcode is unexpected.
return false;
int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
Offset += OffLo;
// LU12I.W+ORI sign extends the result.
Offset = SignExtend64<32>(Offset);
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
<< " " << OffsetLu12i);
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
OffsetTail.eraseFromParent();
OffsetLu12i.eraseFromParent();
return true;
} else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
// The offset value has all zero bits in the lower 12 bits. Only LU12I.W
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
OffsetTail.eraseFromParent();
return true;
case LoongArch::ORI: {
MachineOperand ImmOp = Curr->getOperand(2);
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
return false;
Offset += ImmOp.getImm();
Reg = Curr->getOperand(1).getReg();
Instrs.push_back(Curr);
break;
}
case LoongArch::LU12I_W: {
MachineOperand ImmOp = Curr->getOperand(1);
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
return false;
Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
Reg = LoongArch::R0;
Instrs.push_back(Curr);
break;
}
case LoongArch::LU32I_D: {
MachineOperand ImmOp = Curr->getOperand(2);
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
return false;
Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
Mask ^= 0x000FFFFF00000000ULL;
Reg = Curr->getOperand(1).getReg();
Instrs.push_back(Curr);
break;
}
case LoongArch::LU52I_D: {
MachineOperand ImmOp = Curr->getOperand(2);
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
return false;
Offset += ImmOp.getImm() << 52;
Mask ^= 0xFFF0000000000000ULL;
Reg = Curr->getOperand(1).getReg();
Instrs.push_back(Curr);
break;
}
}
}
return false;

// Can't fold if the offset is not extracted.
if (!Offset)
return false;

foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
for (auto I : Instrs) {
LLVM_DEBUG(dbgs() << " " << *I);
I->eraseFromParent();
}

return true;
}

bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
Expand Down Expand Up @@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
[[fallthrough]];
case LoongArch::ADD_D:
// The offset is too large to fit in the immediate field of ADDI.
// This can be in two forms:
// 1) LU12I.W hi_offset followed by:
// ORI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LU12I.W (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
break;
}
Expand Down
57 changes: 20 additions & 37 deletions llvm/test/CodeGen/LoongArch/merge-base-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: ori $a1, $zero, 0
; LA64-LARGE-NEXT: lu32i.d $a1, 524287
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
Expand All @@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu12i.w $a1, 502733
; LA64-LARGE-NEXT: lu32i.d $a1, 463412
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
Expand All @@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu12i.w $a1, 279556
; LA64-LARGE-NEXT: ori $a1, $a1, 1088
; LA64-LARGE-NEXT: lu32i.d $a1, 17472
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
Expand All @@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu52i.d $a1, $zero, 1
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
Expand All @@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu12i.w $a1, 209666
; LA64-LARGE-NEXT: ori $a1, $a1, 2728
; LA64-LARGE-NEXT: lu32i.d $a1, 15288
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)
Expand Down

0 comments on commit b225b15

Please sign in to comment.