From ca98a3d9bbc254cbb7f028866a7d2077b7994ee8 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 19 Dec 2024 13:16:31 +0000 Subject: [PATCH] [AArch64][SVE] Use SVE for scalar FP converts in streaming[-compatible] functions (1/n) (#118505) In streaming[-compatible] functions, use SVE for scalar FP conversions to/from integer types. This can help avoid moves between FPRs and GRPs, which could be costly. This patch also updates definitions of SCVTF_ZPmZ_StoD and UCVTF_ZPmZ_StoD to disallow lowering to them from ISD nodes, as doing so requires creating a [U|S]INT_TO_FP_MERGE_PASSTHRU node with inconsistent types. Follow up to #112213. Note: This PR does not include support for f64 <-> i32 conversions (like #112564), which needs a bit more work to support. --- .../Target/AArch64/AArch64ISelLowering.cpp | 60 ++- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +- .../sve-streaming-mode-cvt-fp-int-fp.ll | 93 ++++- .../sve-streaming-mode-cvt-fp-to-int.ll | 252 +++++++++++++ .../sve-streaming-mode-cvt-int-to-fp.ll | 252 +++++++++++++ ...e-streaming-mode-fixed-length-fp-to-int.ll | 356 ++++++++---------- ...e-streaming-mode-fixed-length-int-to-fp.ll | 94 +++-- 7 files changed, 857 insertions(+), 254 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 494506def33a3..8a9ee08869cd3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19115,13 +19115,67 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, return SDValue(); } +/// Tries to replace scalar FP <-> INT conversions with SVE in streaming +/// functions, this can help to reduce the number of fmovs to/from GPRs. +static SDValue +tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + if (N->isStrictFPOpcode()) + return SDValue(); + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (!Subtarget->isSVEorStreamingSVEAvailable() || + (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible())) + return SDValue(); + + auto isSupportedType = [](EVT VT) { + return !VT.isVector() && VT != MVT::bf16 && VT != MVT::f128; + }; + + SDValue SrcVal = N->getOperand(0); + EVT SrcTy = SrcVal.getValueType(); + EVT DestTy = N->getValueType(0); + + if (!isSupportedType(SrcTy) || !isSupportedType(DestTy)) + return SDValue(); + + EVT SrcVecTy; + EVT DestVecTy; + if (DestTy.bitsGT(SrcTy)) { + DestVecTy = getPackedSVEVectorVT(DestTy); + SrcVecTy = DestVecTy.changeVectorElementType(SrcTy); + } else { + SrcVecTy = getPackedSVEVectorVT(SrcTy); + DestVecTy = SrcVecTy.changeVectorElementType(DestTy); + } + + // Ensure the resulting src/dest vector type is legal. + if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32) + return SDValue(); + + SDLoc DL(N); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL); + SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy, + DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx); + SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx); +} + static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { // First try to optimize away the conversion when it's conditionally from // a constant. Vectors only. if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) return Res; + if (SDValue Res = + tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget)) + return Res; + EVT VT = N->getValueType(0); if (VT != MVT::f32 && VT != MVT::f64) return SDValue(); @@ -19160,6 +19214,10 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { + if (SDValue Res = + tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget)) + return Res; + if (!Subtarget->isNeonAvailable()) return SDValue(); @@ -26240,7 +26298,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performMulCombine(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - return performIntToFpCombine(N, DAG, Subtarget); + return performIntToFpCombine(N, DAG, DCI, Subtarget); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT_SAT: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index dfdc78e00f2a2..c8892de647437 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2348,8 +2348,8 @@ let Predicates = [HasSVEorSME] in { defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll index 0d291e0bf0798..f4ae66a3b2259 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -1,15 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -force-streaming-compatible < %s | FileCheck %s -; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS -; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS +; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s +; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" define double @t1(double %x) { ; CHECK-LABEL: t1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; USE-NEON-NO-GPRS-LABEL: t1: @@ -17,6 +21,12 @@ define double @t1(double %x) { ; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0 ; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0 ; USE-NEON-NO-GPRS-NEXT: ret +; +; NONEON-NOSVE-LABEL: t1: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptosi double %x to i64 %conv1 = sitofp i64 %conv to double @@ -26,8 +36,11 @@ entry: define float @t2(float %x) { ; CHECK-LABEL: t2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs w8, s0 -; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; USE-NEON-NO-GPRS-LABEL: t2: @@ -35,6 +48,12 @@ define float @t2(float %x) { ; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0 ; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0 ; USE-NEON-NO-GPRS-NEXT: ret +; +; NONEON-NOSVE-LABEL: t2: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptosi float %x to i32 %conv1 = sitofp i32 %conv to float @@ -44,11 +63,20 @@ entry: define half @t3(half %x) { ; CHECK-LABEL: t3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w8, s0 -; CHECK-NEXT: scvtf s0, w8 -; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: t3: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptosi half %x to i32 %conv1 = sitofp i32 %conv to half @@ -58,8 +86,11 @@ entry: define double @t4(double %x) { ; CHECK-LABEL: t4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; USE-NEON-NO-GPRS-LABEL: t4: @@ -67,6 +98,12 @@ define double @t4(double %x) { ; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0 ; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0 ; USE-NEON-NO-GPRS-NEXT: ret +; +; NONEON-NOSVE-LABEL: t4: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptoui double %x to i64 %conv1 = uitofp i64 %conv to double @@ -76,8 +113,11 @@ entry: define float @t5(float %x) { ; CHECK-LABEL: t5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; USE-NEON-NO-GPRS-LABEL: t5: @@ -85,6 +125,12 @@ define float @t5(float %x) { ; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0 ; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0 ; USE-NEON-NO-GPRS-NEXT: ret +; +; NONEON-NOSVE-LABEL: t5: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptoui float %x to i32 %conv1 = uitofp i32 %conv to float @@ -94,11 +140,20 @@ entry: define half @t6(half %x) { ; CHECK-LABEL: t6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: ucvtf s0, w8 -; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: t6: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret entry: %conv = fptoui half %x to i32 %conv1 = uitofp i32 %conv to half diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll new file mode 100644 index 0000000000000..3ae0089d409d0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll @@ -0,0 +1,252 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @f16_to_s32(half %x) { +; CHECK-LABEL: f16_to_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f16_to_s32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi half %x to i32 + ret i32 %cvt +} + +define i64 @f16_to_s64(half %x) { +; CHECK-LABEL: f16_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f16_to_s64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi half %x to i64 + ret i64 %cvt +} + +define i32 @f32_to_s32(float %x) { +; CHECK-LABEL: f32_to_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f32_to_s32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs w0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi float %x to i32 + ret i32 %cvt +} + +define i64 @f32_to_s64(float %x) { +; CHECK-LABEL: f32_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f32_to_s64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs x0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi float %x to i64 + ret i64 %cvt +} + +define i32 @f64_to_s32(double %x) { +; CHECK-LABEL: f64_to_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f64_to_s32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs w0, d0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi double %x to i32 + ret i32 %cvt +} + +define i64 @f64_to_s64(double %x) { +; CHECK-LABEL: f64_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f64_to_s64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs x0, d0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptosi double %x to i64 + ret i64 %cvt +} + +define i32 @f16_to_u32(half %x) { +; CHECK-LABEL: f16_to_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f16_to_u32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui half %x to i32 + ret i32 %cvt +} + +define i64 @f16_to_u64(half %x) { +; CHECK-LABEL: f16_to_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f16_to_u64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui half %x to i64 + ret i64 %cvt +} + +define i32 @f32_to_u32(float %x) { +; CHECK-LABEL: f32_to_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f32_to_u32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu w0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui float %x to i32 + ret i32 %cvt +} + +define i64 @f32_to_u64(float %x) { +; CHECK-LABEL: f32_to_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f32_to_u64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu x0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui float %x to i64 + ret i64 %cvt +} + +define i32 @f64_to_u32(double %x) { +; CHECK-LABEL: f64_to_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f64_to_u32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu w0, d0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui double %x to i32 + ret i32 %cvt +} + +define i64 @f64_to_u64(double %x) { +; CHECK-LABEL: f64_to_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: f64_to_u64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu x0, d0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = fptoui double %x to i64 + ret i64 %cvt +} + +define i32 @strict_convert_signed(double %x) { +; CHECK-LABEL: strict_convert_signed: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: strict_convert_signed: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs w0, d0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %cvt +} + +define i32 @strict_convert_unsigned(float %x) { +; CHECK-LABEL: strict_convert_unsigned: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: strict_convert_unsigned: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu w0, s0 +; NONEON-NOSVE-NEXT: ret + entry: + %cvt = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %cvt +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll new file mode 100644 index 0000000000000..d4221dab4fcff --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll @@ -0,0 +1,252 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE + +target triple = "aarch64-unknown-linux-gnu" + +define half @s32_to_f16(i32 %x) { +; CHECK-LABEL: s32_to_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s32_to_f16: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf s0, w0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i32 %x to half + ret half %cvt +} + +define float @s32_to_f32(i32 %x) { +; CHECK-LABEL: s32_to_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s32_to_f32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf s0, w0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i32 %x to float + ret float %cvt +} + +define double @s32_to_f64(i32 %x) { +; CHECK-LABEL: s32_to_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, w0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s32_to_f64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf d0, w0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i32 %x to double + ret double %cvt +} + +define half @u32_to_f16(i32 %x) { +; CHECK-LABEL: u32_to_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u32_to_f16: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf s0, w0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i32 %x to half + ret half %cvt +} + +define float @u32_to_f32(i32 %x) { +; CHECK-LABEL: u32_to_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u32_to_f32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf s0, w0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i32 %x to float + ret float %cvt +} + +define double @u32_to_f64(i32 %x) { +; CHECK-LABEL: u32_to_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ucvtf d0, w0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u32_to_f64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf d0, w0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i32 %x to double + ret double %cvt +} + +define half @s64_to_f16(i64 %x) { +; CHECK-LABEL: s64_to_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s64_to_f16: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf s0, x0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i64 %x to half + ret half %cvt +} + +define float @s64_to_f32(i64 %x) { +; CHECK-LABEL: s64_to_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s64_to_f32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf s0, x0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i64 %x to float + ret float %cvt +} + +define double @s64_to_f64(i64 %x) { +; CHECK-LABEL: s64_to_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: s64_to_f64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf d0, x0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = sitofp i64 %x to double + ret double %cvt +} + +define half @u64_to_f16(i64 %x) { +; CHECK-LABEL: u64_to_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u64_to_f16: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf s0, x0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i64 %x to half + ret half %cvt +} + +define float @u64_to_f32(i64 %x) { +; CHECK-LABEL: u64_to_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u64_to_f32: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf s0, x0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i64 %x to float + ret float %cvt +} + +define double @u64_to_f64(i64 %x) { +; CHECK-LABEL: u64_to_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: u64_to_f64: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf d0, x0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = uitofp i64 %x to double + ret double %cvt +} + +define float @strict_convert_signed(i32 %x) { +; CHECK-LABEL: strict_convert_signed: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: strict_convert_signed: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: scvtf s0, w0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %cvt +} + +define float @strict_convert_unsigned(i64 %x) { +; CHECK-LABEL: strict_convert_unsigned: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ucvtf s0, x0 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: strict_convert_unsigned: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ucvtf s0, x0 +; NONEON-NOSVE-NEXT: ret +entry: + %cvt = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %cvt +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll index 11fee267660c0..b61c30af37994 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -418,8 +418,10 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) { define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) { ; CHECK-LABEL: fcvtzu_v1f16_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64: @@ -441,10 +443,9 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -472,20 +473,17 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v4f16_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.h, z0.h[3] ; CHECK-NEXT: mov z2.h, z0.h[2] ; CHECK-NEXT: mov z3.h, z0.h[1] -; CHECK-NEXT: fcvtzu x10, h0 -; CHECK-NEXT: fcvtzu x8, h1 -; CHECK-NEXT: fcvtzu x9, h2 -; CHECK-NEXT: fcvtzu x11, h3 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: zip1 z0.d, z1.d, z0.d -; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h +; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h ; CHECK-NEXT: zip1 z1.d, z2.d, z1.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: zip1 z0.d, z0.d, z3.d +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64: @@ -522,36 +520,29 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v8f16_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: mov z3.h, z0.h[2] ; CHECK-NEXT: mov z4.h, z0.h[1] -; CHECK-NEXT: fcvtzu x10, h0 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: fcvtzu x8, h2 -; CHECK-NEXT: fcvtzu x9, h3 -; CHECK-NEXT: fcvtzu x11, h4 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h ; CHECK-NEXT: mov z5.h, z1.h[3] ; CHECK-NEXT: mov z6.h, z1.h[2] -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: fcvtzu x14, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: fcvtzu x12, h5 -; CHECK-NEXT: fcvtzu x13, h6 -; CHECK-NEXT: fcvtzu x15, h2 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: zip1 z0.d, z1.d, z0.d -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: fmov d4, x13 -; CHECK-NEXT: zip1 z2.d, z2.d, z3.d -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: zip1 z1.d, z4.d, z1.d -; CHECK-NEXT: fmov d4, x15 -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: zip1 z3.d, z3.d, z4.d -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: mov z7.h, z1.h[1] +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h +; CHECK-NEXT: zip1 z2.d, z3.d, z2.d +; CHECK-NEXT: zip1 z0.d, z0.d, z4.d +; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h +; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h +; CHECK-NEXT: stp q0, q2, [x1] +; CHECK-NEXT: zip1 z3.d, z6.d, z5.d +; CHECK-NEXT: zip1 z1.d, z1.d, z7.d +; CHECK-NEXT: stp q1, q3, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64: @@ -604,67 +595,54 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z5.d, z1.d -; CHECK-NEXT: mov z2.h, z0.h[3] -; CHECK-NEXT: mov z4.h, z1.h[1] -; CHECK-NEXT: mov z6.h, z1.h[3] -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: mov z7.h, z0.h[1] -; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8 -; CHECK-NEXT: fcvtzu x10, h2 -; CHECK-NEXT: fcvtzu x11, h4 -; CHECK-NEXT: fcvtzu x12, h6 -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fmov d16, x9 -; CHECK-NEXT: mov z2.h, z3.h[3] -; CHECK-NEXT: mov z4.h, z5.h[3] -; CHECK-NEXT: fcvtzu x14, h3 -; CHECK-NEXT: fcvtzu x13, h1 -; CHECK-NEXT: fcvtzu x15, h5 -; CHECK-NEXT: mov z1.h, z3.h[1] -; CHECK-NEXT: mov z6.h, z5.h[1] -; CHECK-NEXT: mov z5.h, z5.h[2] -; CHECK-NEXT: mov z3.h, z3.h[2] -; CHECK-NEXT: fcvtzu x9, h2 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fcvtzu x10, h4 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: fcvtzu x11, h7 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzu x12, h0 -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: fcvtzu x13, h1 -; CHECK-NEXT: fmov d1, x14 -; CHECK-NEXT: fcvtzu x14, h6 -; CHECK-NEXT: fmov d6, x15 -; CHECK-NEXT: fcvtzu x15, h5 -; CHECK-NEXT: fmov d5, x9 -; CHECK-NEXT: fcvtzu x9, h3 -; CHECK-NEXT: zip1 z4.d, z16.d, z4.d -; CHECK-NEXT: fmov d16, x8 -; CHECK-NEXT: zip1 z0.d, z0.d, z7.d -; CHECK-NEXT: fmov d3, x12 -; CHECK-NEXT: fmov d7, x10 -; CHECK-NEXT: stp q4, q0, [x1, #64] -; CHECK-NEXT: fmov d0, x14 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: zip1 z2.d, z3.d, z2.d -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: zip1 z0.d, z6.d, z0.d -; CHECK-NEXT: zip1 z4.d, z4.d, z5.d -; CHECK-NEXT: zip1 z3.d, z16.d, z3.d -; CHECK-NEXT: fmov d16, x15 -; CHECK-NEXT: stp q3, q2, [x1] -; CHECK-NEXT: fmov d2, x13 -; CHECK-NEXT: zip1 z7.d, z16.d, z7.d -; CHECK-NEXT: zip1 z1.d, z1.d, z2.d -; CHECK-NEXT: stp q0, q7, [x1, #96] -; CHECK-NEXT: stp q1, q4, [x1, #32] +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.h, z1.h[1] +; CHECK-NEXT: mov z5.h, z0.h[3] +; CHECK-NEXT: mov z6.h, z0.h[2] +; CHECK-NEXT: mov z16.d, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z1.h +; CHECK-NEXT: mov z4.h, z1.h[3] +; CHECK-NEXT: mov z7.h, z1.h[2] +; CHECK-NEXT: mov z17.h, z0.h[1] +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h +; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzu z17.d, p0/m, z17.h +; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.h +; CHECK-NEXT: mov z20.h, z1.h[3] +; CHECK-NEXT: mov z18.h, z16.h[3] +; CHECK-NEXT: mov z19.h, z16.h[2] +; CHECK-NEXT: mov z21.h, z16.h[1] +; CHECK-NEXT: zip1 z2.d, z2.d, z3.d +; CHECK-NEXT: mov z3.h, z1.h[2] +; CHECK-NEXT: zip1 z5.d, z6.d, z5.d +; CHECK-NEXT: mov z6.h, z1.h[1] +; CHECK-NEXT: zip1 z0.d, z0.d, z17.d +; CHECK-NEXT: fcvtzu z16.d, p0/m, z16.h +; CHECK-NEXT: fcvtzu z18.d, p0/m, z18.h +; CHECK-NEXT: movprfx z17, z21 +; CHECK-NEXT: fcvtzu z17.d, p0/m, z21.h +; CHECK-NEXT: fcvtzu z19.d, p0/m, z19.h +; CHECK-NEXT: zip1 z4.d, z7.d, z4.d +; CHECK-NEXT: movprfx z7, z20 +; CHECK-NEXT: fcvtzu z7.d, p0/m, z20.h +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.h +; CHECK-NEXT: stp q0, q5, [x1, #64] +; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.h +; CHECK-NEXT: zip1 z0.d, z19.d, z18.d +; CHECK-NEXT: zip1 z5.d, z16.d, z17.d +; CHECK-NEXT: stp q2, q4, [x1] +; CHECK-NEXT: zip1 z2.d, z3.d, z7.d +; CHECK-NEXT: zip1 z1.d, z1.d, z6.d +; CHECK-NEXT: stp q5, q0, [x1, #96] +; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64: @@ -2135,8 +2113,10 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) { define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) { ; CHECK-LABEL: fcvtzs_v1f16_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64: @@ -2159,10 +2139,9 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -2190,20 +2169,17 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v4f16_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.h, z0.h[3] ; CHECK-NEXT: mov z2.h, z0.h[2] ; CHECK-NEXT: mov z3.h, z0.h[1] -; CHECK-NEXT: fcvtzs x10, h0 -; CHECK-NEXT: fcvtzs x8, h1 -; CHECK-NEXT: fcvtzs x9, h2 -; CHECK-NEXT: fcvtzs x11, h3 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: zip1 z0.d, z1.d, z0.d -; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h ; CHECK-NEXT: zip1 z1.d, z2.d, z1.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: zip1 z0.d, z0.d, z3.d +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64: @@ -2240,36 +2216,29 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v8f16_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: mov z3.h, z0.h[2] ; CHECK-NEXT: mov z4.h, z0.h[1] -; CHECK-NEXT: fcvtzs x10, h0 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: fcvtzs x8, h2 -; CHECK-NEXT: fcvtzs x9, h3 -; CHECK-NEXT: fcvtzs x11, h4 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h ; CHECK-NEXT: mov z5.h, z1.h[3] ; CHECK-NEXT: mov z6.h, z1.h[2] -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: fcvtzs x14, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: fcvtzs x12, h5 -; CHECK-NEXT: fcvtzs x13, h6 -; CHECK-NEXT: fcvtzs x15, h2 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: zip1 z0.d, z1.d, z0.d -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: fmov d4, x13 -; CHECK-NEXT: zip1 z2.d, z2.d, z3.d -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: zip1 z1.d, z4.d, z1.d -; CHECK-NEXT: fmov d4, x15 -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: zip1 z3.d, z3.d, z4.d -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: mov z7.h, z1.h[1] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: zip1 z2.d, z3.d, z2.d +; CHECK-NEXT: zip1 z0.d, z0.d, z4.d +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: stp q0, q2, [x1] +; CHECK-NEXT: zip1 z3.d, z6.d, z5.d +; CHECK-NEXT: zip1 z1.d, z1.d, z7.d +; CHECK-NEXT: stp q1, q3, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64: @@ -2322,67 +2291,54 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z5.d, z1.d -; CHECK-NEXT: mov z2.h, z0.h[3] -; CHECK-NEXT: mov z4.h, z1.h[1] -; CHECK-NEXT: mov z6.h, z1.h[3] -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: mov z7.h, z0.h[1] -; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x11, h4 -; CHECK-NEXT: fcvtzs x12, h6 -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fmov d16, x9 -; CHECK-NEXT: mov z2.h, z3.h[3] -; CHECK-NEXT: mov z4.h, z5.h[3] -; CHECK-NEXT: fcvtzs x14, h3 -; CHECK-NEXT: fcvtzs x13, h1 -; CHECK-NEXT: fcvtzs x15, h5 -; CHECK-NEXT: mov z1.h, z3.h[1] -; CHECK-NEXT: mov z6.h, z5.h[1] -; CHECK-NEXT: mov z5.h, z5.h[2] -; CHECK-NEXT: mov z3.h, z3.h[2] -; CHECK-NEXT: fcvtzs x9, h2 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fcvtzs x10, h4 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: fcvtzs x11, h7 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, h0 -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: fcvtzs x13, h1 -; CHECK-NEXT: fmov d1, x14 -; CHECK-NEXT: fcvtzs x14, h6 -; CHECK-NEXT: fmov d6, x15 -; CHECK-NEXT: fcvtzs x15, h5 -; CHECK-NEXT: fmov d5, x9 -; CHECK-NEXT: fcvtzs x9, h3 -; CHECK-NEXT: zip1 z4.d, z16.d, z4.d -; CHECK-NEXT: fmov d16, x8 -; CHECK-NEXT: zip1 z0.d, z0.d, z7.d -; CHECK-NEXT: fmov d3, x12 -; CHECK-NEXT: fmov d7, x10 -; CHECK-NEXT: stp q4, q0, [x1, #64] -; CHECK-NEXT: fmov d0, x14 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: zip1 z2.d, z3.d, z2.d -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: zip1 z0.d, z6.d, z0.d -; CHECK-NEXT: zip1 z4.d, z4.d, z5.d -; CHECK-NEXT: zip1 z3.d, z16.d, z3.d -; CHECK-NEXT: fmov d16, x15 -; CHECK-NEXT: stp q3, q2, [x1] -; CHECK-NEXT: fmov d2, x13 -; CHECK-NEXT: zip1 z7.d, z16.d, z7.d -; CHECK-NEXT: zip1 z1.d, z1.d, z2.d -; CHECK-NEXT: stp q0, q7, [x1, #96] -; CHECK-NEXT: stp q1, q4, [x1, #32] +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.h, z1.h[1] +; CHECK-NEXT: mov z5.h, z0.h[3] +; CHECK-NEXT: mov z6.h, z0.h[2] +; CHECK-NEXT: mov z16.d, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h +; CHECK-NEXT: mov z4.h, z1.h[3] +; CHECK-NEXT: mov z7.h, z1.h[2] +; CHECK-NEXT: mov z17.h, z0.h[1] +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h +; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzs z17.d, p0/m, z17.h +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: mov z20.h, z1.h[3] +; CHECK-NEXT: mov z18.h, z16.h[3] +; CHECK-NEXT: mov z19.h, z16.h[2] +; CHECK-NEXT: mov z21.h, z16.h[1] +; CHECK-NEXT: zip1 z2.d, z2.d, z3.d +; CHECK-NEXT: mov z3.h, z1.h[2] +; CHECK-NEXT: zip1 z5.d, z6.d, z5.d +; CHECK-NEXT: mov z6.h, z1.h[1] +; CHECK-NEXT: zip1 z0.d, z0.d, z17.d +; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.h +; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.h +; CHECK-NEXT: movprfx z17, z21 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z21.h +; CHECK-NEXT: fcvtzs z19.d, p0/m, z19.h +; CHECK-NEXT: zip1 z4.d, z7.d, z4.d +; CHECK-NEXT: movprfx z7, z20 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z20.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: stp q0, q5, [x1, #64] +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h +; CHECK-NEXT: zip1 z0.d, z19.d, z18.d +; CHECK-NEXT: zip1 z5.d, z16.d, z17.d +; CHECK-NEXT: stp q2, q4, [x1] +; CHECK-NEXT: zip1 z2.d, z3.d, z7.d +; CHECK-NEXT: zip1 z1.d, z1.d, z6.d +; CHECK-NEXT: stp q5, q0, [x1, #96] +; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index e595686cb4975..d61f92b406294 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -1142,10 +1142,9 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: ucvtf h0, x8 -; CHECK-NEXT: ucvtf h1, x9 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -2596,10 +2595,9 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: scvtf h0, x8 -; CHECK-NEXT: scvtf h1, x9 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -2795,7 +2793,10 @@ define half @scvtf_i16_f16(ptr %0) { ; CHECK-LABEL: scvtf_i16_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsh w8, [x0] -; CHECK-NEXT: scvtf h0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i16_f16: @@ -2813,7 +2814,10 @@ define float @scvtf_i16_f32(ptr %0) { ; CHECK-LABEL: scvtf_i16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsh w8, [x0] -; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i16_f32: @@ -2846,8 +2850,10 @@ define double @scvtf_i16_f64(ptr %0) { define half @scvtf_i32_f16(ptr %0) { ; CHECK-LABEL: scvtf_i32_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: scvtf h0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i32_f16: @@ -2864,8 +2870,10 @@ define half @scvtf_i32_f16(ptr %0) { define float @scvtf_i32_f32(ptr %0) { ; CHECK-LABEL: scvtf_i32_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i32_f32: @@ -2898,8 +2906,10 @@ define double @scvtf_i32_f64(ptr %0) { define half @scvtf_i64_f16(ptr %0) { ; CHECK-LABEL: scvtf_i64_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: scvtf h0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i64_f16: @@ -2916,8 +2926,10 @@ define half @scvtf_i64_f16(ptr %0) { define float @scvtf_i64_f32(ptr %0) { ; CHECK-LABEL: scvtf_i64_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: scvtf s0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i64_f32: @@ -2933,8 +2945,10 @@ define float @scvtf_i64_f32(ptr %0) { define double @scvtf_i64_f64(ptr %0) { ; CHECK-LABEL: scvtf_i64_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_i64_f64: @@ -2951,7 +2965,10 @@ define half @ucvtf_i16_f16(ptr %0) { ; CHECK-LABEL: ucvtf_i16_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ucvtf h0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i16_f16: @@ -2969,7 +2986,10 @@ define float @ucvtf_i16_f32(ptr %0) { ; CHECK-LABEL: ucvtf_i16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i16_f32: @@ -3002,8 +3022,10 @@ define double @ucvtf_i16_f64(ptr %0) { define half @ucvtf_i32_f16(ptr %0) { ; CHECK-LABEL: ucvtf_i32_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ucvtf h0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i32_f16: @@ -3020,8 +3042,10 @@ define half @ucvtf_i32_f16(ptr %0) { define float @ucvtf_i32_f32(ptr %0) { ; CHECK-LABEL: ucvtf_i32_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i32_f32: @@ -3054,8 +3078,10 @@ define double @ucvtf_i32_f64(ptr %0) { define half @ucvtf_i64_f16(ptr %0) { ; CHECK-LABEL: ucvtf_i64_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ucvtf h0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i64_f16: @@ -3072,8 +3098,10 @@ define half @ucvtf_i64_f16(ptr %0) { define float @ucvtf_i64_f32(ptr %0) { ; CHECK-LABEL: ucvtf_i64_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ucvtf s0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i64_f32: @@ -3089,8 +3117,10 @@ define float @ucvtf_i64_f32(ptr %0) { define double @ucvtf_i64_f64(ptr %0) { ; CHECK-LABEL: ucvtf_i64_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i64_f64: