-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Use LSX for scalar FP rounding with explicit rounding mode #114766
Conversation
LoongArch FP base ISA only have frint.{s/d} instruction which reads the global rounding mode. Utilize LSX for explicit rounding mode for scalar ceil/floor/trunc/roundeven calls when -mlsx opend. It is faster than calling the libm library functions.
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesLoongArch FP base ISA only have frint.{s/d} instruction which reads the global rounding mode. Utilize LSX for explicit rounding mode for scalar ceil/floor/trunc/roundeven calls when -mlsx opend. It is faster than calling the libm library functions. Full diff: https://github.com/llvm/llvm-project/pull/114766.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 6bee00d1ce3823..fde1a6acc0fde3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -286,6 +286,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
}
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
+ setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
+ setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
+ setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
+ setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
}
// Set operations for 'LASX' feature.
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 525d2802daa235..25e70b4e6b35ae 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2259,6 +2259,32 @@ def : Pat<(loongarch_vfrsqrte v2f64:$src),
(VFRSQRTE_D v2f64:$src)>;
}
+// Vector floating-point conversion
+def : Pat<(f32 (fceil FPR32:$fj)),
+ (f32 (EXTRACT_SUBREG (VFRINTRP_S (VREPLVEI_W
+ (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
+def : Pat<(f64 (fceil FPR64:$fj)),
+ (f64 (EXTRACT_SUBREG (VFRINTRP_D (VREPLVEI_D
+ (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+def : Pat<(f32 (ffloor FPR32:$fj)),
+ (f32 (EXTRACT_SUBREG (VFRINTRM_S (VREPLVEI_W
+ (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
+def : Pat<(f64 (ffloor FPR64:$fj)),
+ (f64 (EXTRACT_SUBREG (VFRINTRM_D (VREPLVEI_D
+ (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+def : Pat<(f32 (ftrunc FPR32:$fj)),
+ (f32 (EXTRACT_SUBREG (VFRINTRZ_S (VREPLVEI_W
+ (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
+def : Pat<(f64 (ftrunc FPR64:$fj)),
+ (f64 (EXTRACT_SUBREG (VFRINTRZ_D (VREPLVEI_D
+ (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+def : Pat<(f32 (froundeven FPR32:$fj)),
+ (f32 (EXTRACT_SUBREG (VFRINTRNE_S (VREPLVEI_W
+ (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
+def : Pat<(f64 (froundeven FPR64:$fj)),
+ (f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
+ (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vector-fp-conv.ll b/llvm/test/CodeGen/LoongArch/lsx/vector-fp-conv.ll
new file mode 100644
index 00000000000000..b2c618d2824e1f
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vector-fp-conv.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+;; ceilf
+define float @ceil_f32(float %i) nounwind {
+; CHECK-LABEL: ceil_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.s $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call float @llvm.ceil.f32(float %i)
+ ret float %0
+}
+
+;; ceil
+define double @ceil_f64(double %i) nounwind {
+; CHECK-LABEL: ceil_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrp.d $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call double @llvm.ceil.f64(double %i)
+ ret double %0
+}
+
+;; floorf
+define float @floor_f32(float %i) nounwind {
+; CHECK-LABEL: floor_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.s $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call float @llvm.floor.f32(float %i)
+ ret float %0
+}
+
+;; floor
+define double @floor_f64(double %i) nounwind {
+; CHECK-LABEL: floor_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrm.d $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call double @llvm.floor.f64(double %i)
+ ret double %0
+}
+
+;; truncf
+define float @trunc_f32(float %i) nounwind {
+; CHECK-LABEL: trunc_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.s $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call float @llvm.trunc.f32(float %i)
+ ret float %0
+}
+
+;; trunc
+define double @trunc_f64(double %i) nounwind {
+; CHECK-LABEL: trunc_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrz.d $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call double @llvm.trunc.f64(double %i)
+ ret double %0
+}
+
+;; roundevenf
+define float @roundeven_f32(float %i) nounwind {
+; CHECK-LABEL: roundeven_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.s $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call float @llvm.roundeven.f32(float %i)
+ ret float %0
+}
+
+;; roundeven
+define double @roundeven_f64(double %i) nounwind {
+; CHECK-LABEL: roundeven_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vfrintrne.d $vr0, $vr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; CHECK-NEXT: ret
+entry:
+ %0 = call double @llvm.roundeven.f64(double %i)
+ ret double %0
+}
+
+declare float @llvm.ceil.f32(float)
+declare double @llvm.ceil.f64(double)
+declare float @llvm.floor.f32(float)
+declare double @llvm.floor.f64(double)
+declare float @llvm.trunc.f32(float)
+declare double @llvm.trunc.f64(double)
+declare float @llvm.roundeven.f32(float)
+declare double @llvm.roundeven.f64(double)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wow this is impressive. I think the isel part is OK, please wait review from @SixWeining!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. But it would be better precommit the new test before this PR.
@SixWeining . A test was added in #114968. Thanks. |
LoongArch FP base ISA only have frint.{s/d} instruction which reads the global rounding mode. Utilize LSX for explicit rounding mode for scalar ceil/floor/trunc/roundeven calls when -mlsx opend. It is faster than calling the libm library functions.