From 476c0cc7cdbaed16e5454828085f675babfe6f02 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 16 Oct 2024 14:45:43 +0000 Subject: [PATCH] [AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp conversions When Neon is not available use SVE variants of FCVTZS, FCVTZU, UCVTF, and SCVTF for fp -> int -> fp conversions to avoid moving values to/from GPRs which may be expensive. Note: With +sme2p2 the single-element vector Neon variants of these instructions could be used instead (but that feature is not implemented yet). Follow up to #112213. --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 35 ++++++++ .../sve-streaming-mode-cvt-fp-int-fp.ll | 89 +++++++++++++++---- 2 files changed, 107 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 2a857234c7d74..19dc2016f9fcf 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2421,6 +2421,41 @@ let Predicates = [HasSVEorSME] in { defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>; } // End HasSVEorSME +// Helper for creating fp -> int -> fp conversions using SVE. +class sve_fp_int_fp_cvt + : OutPatFrag<(ops node: $Rn), + (EXTRACT_SUBREG + (FROM_INT (IMPLICIT_DEF), (PTRUE 1), + (TO_INT (IMPLICIT_DEF), (PTRUE 1), + (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>; + +// Some float -> int -> float conversion patterns where we want to keep the int +// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR +// register transfers. Only used when NEON is not available (e.g. in streaming +// functions). +// TODO: When +sme2p2 is available single-element vectors should be preferred. +def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">; +let Predicates = [HasSVEorSME, HasNoNEON] in { +def : Pat< + (f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +def : Pat< + (f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +def : Pat< + (f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +def : Pat< + (f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +def : Pat< + (f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +def : Pat< + (f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))), + (sve_fp_int_fp_cvt $Rn)>; +} // End HasSVEorSME, HasNoNEON + let Predicates = [HasBF16, HasSVEorSME] in { defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll index 9aadf3133ba19..fbbe2cc64ad24 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE ; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING target triple = "aarch64-unknown-linux-gnu" @@ -7,10 +8,19 @@ target triple = "aarch64-unknown-linux-gnu" define double @t1(double %x) { ; CHECK-LABEL: t1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t1: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t1: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvtzs d0, d0 @@ -25,10 +35,19 @@ entry: define float @t2(float %x) { ; CHECK-LABEL: t2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs w8, s0 -; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ptrue p0.s, vl1 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t2: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t2: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvtzs s0, s0 @@ -43,12 +62,21 @@ entry: define half @t3(half %x) { ; CHECK-LABEL: t3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzs w8, s0 -; CHECK-NEXT: scvtf s0, w8 -; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ptrue p0.h, vl1 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t3: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t3: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvt s0, h0 @@ -65,10 +93,19 @@ entry: define double @t4(double %x) { ; CHECK-LABEL: t4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t4: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t4: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvtzu d0, d0 @@ -83,10 +120,19 @@ entry: define float @t5(float %x) { ; CHECK-LABEL: t5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ptrue p0.s, vl1 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t5: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t5: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvtzu s0, s0 @@ -101,12 +147,21 @@ entry: define half @t6(half %x) { ; CHECK-LABEL: t6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: ucvtf s0, w8 -; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ptrue p0.h, vl1 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret ; +; NONEON-NOSVE-LABEL: t6: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret +; ; NON-STREAMING-LABEL: t6: ; NON-STREAMING: // %bb.0: // %entry ; NON-STREAMING-NEXT: fcvt s0, h0