Skip to content

Commit

Permalink
[ARM] VBIT/VBIF support added.
Browse files Browse the repository at this point in the history
Vector bitwise selects are matched by pseudo VBSP instruction
and expanded to VBSL/VBIT/VBIF after register allocation
depend on operands registers to minimize extra copies.
  • Loading branch information
ilinpv committed Jul 16, 2020
1 parent 15d058f commit b9a6fb6
Show file tree
Hide file tree
Showing 19 changed files with 553 additions and 451 deletions.
60 changes: 60 additions & 0 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;

case ARM::VBSPd:
case ARM::VBSPq: {
Register DstReg = MI.getOperand(0).getReg();
if (DstReg == MI.getOperand(3).getReg()) {
// Expand to VBIT
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(3))
.add(MI.getOperand(2))
.add(MI.getOperand(1))
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
} else if (DstReg == MI.getOperand(2).getReg()) {
// Expand to VBIF
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(1))
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
} else {
// Expand to VBSL
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
if (DstReg == MI.getOperand(1).getReg()) {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
} else {
// Use move to satisfy constraints
unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
.addReg(DstReg,
RegState::Define |
getRenamableRegState(MI.getOperand(0).isRenamable()))
.add(MI.getOperand(1))
.add(MI.getOperand(1))
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
.add(MI.getOperand(0))
.addReg(DstReg,
RegState::Kill |
getRenamableRegState(MI.getOperand(0).isRenamable()))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
}
}
MI.eraseFromParent();
return true;
}

case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1755,7 +1755,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::VBSP: return "ARMISD::VBSP";
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
Expand Down Expand Up @@ -13153,7 +13153,7 @@ static SDValue PerformORCombine(SDNode *N,
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ class VectorType;
// Vector AND with NOT of immediate
VBICIMM,

// Vector bitwise select
VBSL,
// Pseudo vector bitwise select
VBSP,

// Pseudo-instruction representing a memory copy using ldm/stm
// instructions.
Expand Down
91 changes: 49 additions & 42 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;

def NEONvbsl : SDNode<"ARMISD::VBSL",
def NEONvbsp : SDNode<"ARMISD::VBSP",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
Expand Down Expand Up @@ -4526,9 +4526,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (saddsat
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
imm:$lane)))))),
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
Expand Down Expand Up @@ -4579,17 +4579,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
def : Pat<(v8i16 (ssubsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
imm:$lane)))))),
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG
(v4i16 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
Expand All @@ -4601,7 +4601,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG
(v2i32 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
Expand Down Expand Up @@ -5442,74 +5442,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
}

// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VCNTiD,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
// The TwoAddress pass will not go looking for equivalent operations
// with different register constraints; it just inserts copies.
// That is why pseudo VBSP implemented. Is is expanded later into
// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
def VBSPd
: PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
IIC_VBINiD, "",
[(set DPR:$Vd,
(v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;

def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;

def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
}

def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;

def VBSPq
: PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
IIC_VBINiQ, "",
[(set QPR:$Vd,
(v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;

def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
(VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
}

// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;

def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;

// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
// FIXME: This instruction's encoding MAY NOT BE correct.
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
Expand All @@ -5523,7 +5535,6 @@ def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,

// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
// FIXME: This instruction's encoding MAY NOT BE correct.
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
Expand All @@ -5535,10 +5546,6 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[]>;

// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
// for equivalent operations with different register constraints; it just
// inserts copies.

// Vector Absolute Differences.

// VABD : Vector Absolute Difference
Expand Down Expand Up @@ -7953,7 +7960,7 @@ let Predicates = [HasNEON,IsLE] in {
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
}
// The following patterns are basically a copy of the patterns above,
// The following patterns are basically a copy of the patterns above,
// however with an additional VREV16d instruction to convert data
// loaded by VLD1LN into proper vector format in big endian mode.
let Predicates = [HasNEON,IsBE] in {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMScheduleA57.td
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
// --- 3.16 ASIMD Miscellaneous Instructions ---

// ASIMD bitwise insert
def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;

// ASIMD count
def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMScheduleR52.td
Original file line number Diff line number Diff line change
Expand Up @@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;

def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;

def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
(instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMScheduleSwift.td
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
(instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
"VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
"VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
"VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF",
"VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
"VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
"VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;

def : InstRW<[SwiftWriteP1TwoCycle],
(instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/ARM/fcopysign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ define float @test1(float %x, float %y) nounwind {
;
; HARD-LABEL: test1:
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vmov.f32 s4, s1
; HARD-NEXT: vmov.f32 s2, s1
; HARD-NEXT: @ kill: def $s0 killed $s0 def $d0
; HARD-NEXT: vmov.i32 d1, #0x80000000
; HARD-NEXT: vbsl d1, d2, d0
; HARD-NEXT: vmov.f32 s0, s2
; HARD-NEXT: vmov.i32 d16, #0x80000000
; HARD-NEXT: vbit d0, d1, d16
; HARD-NEXT: @ kill: def $s0 killed $s0 killed $d0
; HARD-NEXT: bx lr
entry:

Expand All @@ -35,8 +35,7 @@ define double @test2(double %x, double %y) nounwind {
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vmov.i32 d16, #0x80000000
; HARD-NEXT: vshl.i64 d16, d16, #32
; HARD-NEXT: vbsl d16, d1, d0
; HARD-NEXT: vorr d0, d16, d16
; HARD-NEXT: vbit d0, d1, d16
; HARD-NEXT: bx lr
entry:

Expand All @@ -53,15 +52,16 @@ define double @test3(double %x, double %y, double %z) nounwind {
; SOFT-NEXT: vmov.i32 d17, #0x80000000
; SOFT-NEXT: vshl.i64 d17, d17, #32
; SOFT-NEXT: vldr d18, [sp]
; SOFT-NEXT: vbsl d17, d18, d16
; SOFT-NEXT: vmov r0, r1, d17
; SOFT-NEXT: vbit d16, d18, d17
; SOFT-NEXT: vmov r0, r1, d16
; SOFT-NEXT: bx lr
;
; HARD-LABEL: test3:
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vmul.f64 d16, d0, d1
; HARD-NEXT: vmov.i32 d17, #0x80000000
; HARD-NEXT: vshl.i64 d0, d17, #32
; HARD-NEXT: vshl.i64 d17, d17, #32
; HARD-NEXT: vorr d0, d17, d17
; HARD-NEXT: vbsl d0, d2, d16
; HARD-NEXT: bx lr
entry:
Expand All @@ -81,8 +81,8 @@ define float @test4() nounwind {
; SOFT-NEXT: vmov.i32 d17, #0x80000000
; SOFT-NEXT: vshr.u64 d16, d16, #32
; SOFT-NEXT: vmov.f32 d18, #5.000000e-01
; SOFT-NEXT: vbsl d17, d16, d18
; SOFT-NEXT: vadd.f32 d0, d0, d17
; SOFT-NEXT: vbif d16, d18, d17
; SOFT-NEXT: vadd.f32 d0, d0, d16
; SOFT-NEXT: vmov r0, s0
; SOFT-NEXT: pop {lr}
;
Expand All @@ -93,10 +93,10 @@ define float @test4() nounwind {
; HARD-NEXT: bl bar
; HARD-NEXT: vmov d16, r0, r1
; HARD-NEXT: vcvt.f32.f64 s0, d16
; HARD-NEXT: vmov.i32 d1, #0x80000000
; HARD-NEXT: vmov.i32 d17, #0x80000000
; HARD-NEXT: vshr.u64 d16, d16, #32
; HARD-NEXT: vmov.f32 s4, #5.000000e-01
; HARD-NEXT: vbsl d1, d16, d2
; HARD-NEXT: vmov.f32 s2, #5.000000e-01
; HARD-NEXT: vbit d1, d16, d17
; HARD-NEXT: vadd.f32 s0, s0, s2
; HARD-NEXT: pop {r11, pc}
entry:
Expand Down
Loading

0 comments on commit b9a6fb6

Please sign in to comment.