[Clang][XTHeadVector] Add unit-stride fault-only-first load intrinsics (

llvm#50) * [Clang][XTHeadVector] Add Unit-stride Fault-Only-First Loads * [NFC][XTHeadVector] Update README * [Clang][XTHeadVector] Add tests for integers * [Clang][XTHeadVector] Add tests for floats * [NFC][XTHeadVector] Unify names in wrapper macros * [Clang][XTHeadVector] Add wrapper macros * [Clang][XTHeadVector] Test wrapper macros
revyos · Apr 1, 2024 · e4906da · e4906da
1 parent 2e78c12
commit e4906da
Show file tree

Hide file tree

Showing 11 changed files with 1,607 additions and 280 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,8 @@ Any feature not listed below but present in the specification should be consider
     - (Done) `6.2. Set vl to VLMAX with specific vtype`
   - (WIP) `7. Vector Load/Store`
     - (Done) `7.1. Vector Unit-Stride Operations`
+    - (Done) `7.2. Vector Strided Load/Store Operations`
+    - (Done) `7.4 Unit-stride Fault-Only-First Loads Operations`
 
 ## Q & A
 

diff --git a/clang/include/clang/Basic/riscv_vector_xtheadv.td b/clang/include/clang/Basic/riscv_vector_xtheadv.td
@@ -242,6 +242,54 @@ let SupportOverloading = false,
       def : RVVOutBuiltin<"Uv", "UvPCUez", type>;
     }
   }
+
+  // 7.4. Unit-stride Fault-Only-First Loads Operations
+  multiclass RVVVLEFFBuiltin<string ir, list<string> types> {
+    let Name = NAME # "_v",
+        IRName = ir,
+        MaskedIRName = ir # "_mask",
+        ManualCodegen = [{
+        {
+          if (IsMasked) {
+            // Move mask to right before vl.
+            std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+            if ((PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA))
+              Ops.insert(Ops.begin(), llvm::PoisonValue::get(ResultType));
+            Ops.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs));
+            IntrinsicTypes = {ResultType, Ops[4]->getType()};
+          } else {
+            if (PolicyAttrs & RVV_VTA)
+              Ops.insert(Ops.begin(), llvm::PoisonValue::get(ResultType));
+            IntrinsicTypes = {ResultType, Ops[3]->getType()};
+          }
+          Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
+          Value *NewVL = Ops[2];
+          Ops.erase(Ops.begin() + 2);
+          llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
+          llvm::Value *LoadValue = Builder.CreateCall(F, Ops, "");
+          llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0});
+          // Store new_vl.
+          clang::CharUnits Align;
+          if (IsMasked)
+            Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(E->getNumArgs()-2)->getType());
+          else
+            Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(1)->getType());
+          llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {1});
+          Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align));
+          return V;
+        }
+    }] in {
+      foreach type = types in {
+        // `vPCePz` is type `const T * -> SizeT * -> {VL} -> VectorType`
+        // Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
+        def : RVVBuiltin<"v", "vPCePz", type>;
+        if !not(IsFloat<type>.val) then {
+          // `UvPCUePz` is type `const unsigned T * -> SizeT * -> {VL} -> unsigned VectorType`
+          def : RVVBuiltin<"Uv", "UvPCUePz", type>;
+        }
+      }
+    }
+  }
 }
 
 let HasMaskedOffOperand = false,
@@ -376,6 +424,12 @@ defm th_vsse16: RVVVSSEBuiltin<"th_vsse", ["s","x"]>; // i16, f16
 defm th_vsse32: RVVVSSEBuiltin<"th_vsse", ["i","f"]>; // i32, f32
 defm th_vsse64: RVVVSSEBuiltin<"th_vsse", ["l","d"]>; // i64, f64
 
+// 7.4. Unit-stride Fault-Only-First Loads Operations
+defm th_vle8ff : RVVVLEFFBuiltin<"th_vleff", ["c"]>;      // i8
+defm th_vle16ff: RVVVLEFFBuiltin<"th_vleff", ["s","x"]>;  // i16, f16
+defm th_vle32ff: RVVVLEFFBuiltin<"th_vleff", ["i", "f"]>; // i32, f32
+defm th_vle64ff: RVVVLEFFBuiltin<"th_vleff", ["l", "d"]>; // i64, f64
+
 //===----------------------------------------------------------------------===//
 // 12. Vector Integer Arithmetic Operations
 //===----------------------------------------------------------------------===//

diff --git a/clang/include/clang/Basic/riscv_vector_xtheadv_wrappers.td b/clang/include/clang/Basic/riscv_vector_xtheadv_wrappers.td
diff --git a/clang/test/CodeGen/RISCV/rvv0p71-intrinsics-handcrafted/unit-stride-ff/thead/vle16ff.c b/clang/test/CodeGen/RISCV/rvv0p71-intrinsics-handcrafted/unit-stride-ff/thead/vle16ff.c
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadvector \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | \
+// RUN:   opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+typedef _Float16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i16> @test_th_vle16ff_v_i16m1
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.th.vleff.nxv4i16.i64(<vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
+//
+vint16m1_t test_th_vle16ff_v_i16m1(const int16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_i16m1(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i16> @test_th_vle16ff_v_i16m2
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.th.vleff.nxv8i16.i64(<vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+vint16m2_t test_th_vle16ff_v_i16m2(const int16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_i16m2(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i16> @test_th_vle16ff_v_i16m4
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.th.vleff.nxv16i16.i64(<vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
+//
+vint16m4_t test_th_vle16ff_v_i16m4(const int16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_i16m4(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x i16> @test_th_vle16ff_v_i16m8
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.th.vleff.nxv32i16.i64(<vscale x 32 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
+//
+vint16m8_t test_th_vle16ff_v_i16m8(const int16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_i16m8(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i16> @test_th_vle16ff_v_u16m1
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.th.vleff.nxv4i16.i64(<vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
+//
+vuint16m1_t test_th_vle16ff_v_u16m1(const uint16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_u16m1(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i16> @test_th_vle16ff_v_u16m2
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.th.vleff.nxv8i16.i64(<vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+vuint16m2_t test_th_vle16ff_v_u16m2(const uint16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_u16m2(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i16> @test_th_vle16ff_v_u16m4
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.th.vleff.nxv16i16.i64(<vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
+//
+vuint16m4_t test_th_vle16ff_v_u16m4(const uint16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_u16m4(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x i16> @test_th_vle16ff_v_u16m8
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.th.vleff.nxv32i16.i64(<vscale x 32 x i16> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
+//
+vuint16m8_t test_th_vle16ff_v_u16m8(const uint16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_u16m8(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x half> @test_th_vle16ff_v_f16m1
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 4 x half>, i64 } @llvm.riscv.th.vleff.nxv4f16.i64(<vscale x 4 x half> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
+//
+vfloat16m1_t test_th_vle16ff_v_f16m1(const float16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_f16m1(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x half> @test_th_vle16ff_v_f16m2
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 8 x half>, i64 } @llvm.riscv.th.vleff.nxv8f16.i64(<vscale x 8 x half> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+vfloat16m2_t test_th_vle16ff_v_f16m2(const float16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_f16m2(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x half> @test_th_vle16ff_v_f16m4
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 16 x half>, i64 } @llvm.riscv.th.vleff.nxv16f16.i64(<vscale x 16 x half> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
+//
+vfloat16m4_t test_th_vle16ff_v_f16m4(const float16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_f16m4(base, new_vl, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x half> @test_th_vle16ff_v_f16m8
+// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call { <vscale x 32 x half>, i64 } @llvm.riscv.th.vleff.nxv32f16.i64(<vscale x 32 x half> poison, ptr [[BASE]], i64 [[VL]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP0]], 0
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP0]], 1
+// CHECK-RV64-NEXT:    store i64 [[TMP2]], ptr [[NEW_VL]], align 8
+// CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
+//
+vfloat16m8_t test_th_vle16ff_v_f16m8(const float16_t *base, size_t *new_vl, size_t vl) {
+  return __riscv_th_vle16ff_v_f16m8(base, new_vl, vl);
+}