Skip to content

Commit

Permalink
[AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp conversions
Browse files Browse the repository at this point in the history
When Neon is not available use SVE variants of FCVTZS, FCVTZU, UCVTF,
and  SCVTF for fp -> int -> fp conversions to avoid moving values
to/from GPRs which may be expensive.

Note: With +sme2p2 the single-element vector Neon variants of these
instructions could be used instead (but that feature is not implemented
yet).

Follow up to llvm#112213.
  • Loading branch information
MacDue committed Oct 16, 2024
1 parent df05512 commit 476c0cc
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 17 deletions.
35 changes: 35 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2421,6 +2421,41 @@ let Predicates = [HasSVEorSME] in {
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
} // End HasSVEorSME

// Helper for creating fp -> int -> fp conversions using SVE.
class sve_fp_int_fp_cvt<Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
: OutPatFrag<(ops node: $Rn),
(EXTRACT_SUBREG
(FROM_INT (IMPLICIT_DEF), (PTRUE 1),
(TO_INT (IMPLICIT_DEF), (PTRUE 1),
(INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;

// Some float -> int -> float conversion patterns where we want to keep the int
// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR
// register transfers. Only used when NEON is not available (e.g. in streaming
// functions).
// TODO: When +sme2p2 is available single-element vectors should be preferred.
def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
let Predicates = [HasSVEorSME, HasNoNEON] in {
def : Pat<
(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
def : Pat<
(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
def : Pat<
(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
def : Pat<
(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
def : Pat<
(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
def : Pat<
(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
(sve_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
} // End HasSVEorSME, HasNoNEON

let Predicates = [HasBF16, HasSVEorSME] in {
defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;
Expand Down
89 changes: 72 additions & 17 deletions llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING

target triple = "aarch64-unknown-linux-gnu"

define double @t1(double %x) {
; CHECK-LABEL: t1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs x8, d0
; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t1:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvtzs x8, d0
; NONEON-NOSVE-NEXT: scvtf d0, x8
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t1:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs d0, d0
Expand All @@ -25,10 +35,19 @@ entry:
define float @t2(float %x) {
; CHECK-LABEL: t2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs w8, s0
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t2:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
; NONEON-NOSVE-NEXT: scvtf s0, w8
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t2:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs s0, s0
Expand All @@ -43,12 +62,21 @@ entry:
define half @t3(half %x) {
; CHECK-LABEL: t3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fcvtzs w8, s0
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t3:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
; NONEON-NOSVE-NEXT: scvtf s0, w8
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t3:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
Expand All @@ -65,10 +93,19 @@ entry:
define double @t4(double %x) {
; CHECK-LABEL: t4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu x8, d0
; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t4:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvtzu x8, d0
; NONEON-NOSVE-NEXT: ucvtf d0, x8
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t4:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu d0, d0
Expand All @@ -83,10 +120,19 @@ entry:
define float @t5(float %x) {
; CHECK-LABEL: t5:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t5:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
; NONEON-NOSVE-NEXT: ucvtf s0, w8
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t5:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu s0, s0
Expand All @@ -101,12 +147,21 @@ entry:
define half @t6(half %x) {
; CHECK-LABEL: t6:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: t6:
; NONEON-NOSVE: // %bb.0: // %entry
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
; NONEON-NOSVE-NEXT: ucvtf s0, w8
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: ret
;
; NON-STREAMING-LABEL: t6:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
Expand Down

0 comments on commit 476c0cc

Please sign in to comment.