Skip to content

Commit

Permalink
[Clang][XTHeadVector] Add unit-stride fault-only-first load intrinsics (
Browse files Browse the repository at this point in the history
llvm#50)

* [Clang][XTHeadVector] Add Unit-stride Fault-Only-First Loads

* [NFC][XTHeadVector] Update README

* [Clang][XTHeadVector] Add tests for integers

* [Clang][XTHeadVector] Add tests for floats

* [NFC][XTHeadVector] Unify names in wrapper macros

* [Clang][XTHeadVector] Add wrapper macros

* [Clang][XTHeadVector] Test wrapper macros
  • Loading branch information
imkiva committed Apr 1, 2024
1 parent 2e78c12 commit e4906da
Show file tree
Hide file tree
Showing 11 changed files with 1,607 additions and 280 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Any feature not listed below but present in the specification should be consider
- (Done) `6.2. Set vl to VLMAX with specific vtype`
- (WIP) `7. Vector Load/Store`
- (Done) `7.1. Vector Unit-Stride Operations`
- (Done) `7.2. Vector Strided Load/Store Operations`
- (Done) `7.4 Unit-stride Fault-Only-First Loads Operations`

## Q & A

Expand Down
54 changes: 54 additions & 0 deletions clang/include/clang/Basic/riscv_vector_xtheadv.td
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,54 @@ let SupportOverloading = false,
def : RVVOutBuiltin<"Uv", "UvPCUez", type>;
}
}

// 7.4. Unit-stride Fault-Only-First Loads Operations
multiclass RVVVLEFFBuiltin<string ir, list<string> types> {
let Name = NAME # "_v",
IRName = ir,
MaskedIRName = ir # "_mask",
ManualCodegen = [{
{
if (IsMasked) {
// Move mask to right before vl.
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
if ((PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA))
Ops.insert(Ops.begin(), llvm::PoisonValue::get(ResultType));
Ops.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs));
IntrinsicTypes = {ResultType, Ops[4]->getType()};
} else {
if (PolicyAttrs & RVV_VTA)
Ops.insert(Ops.begin(), llvm::PoisonValue::get(ResultType));
IntrinsicTypes = {ResultType, Ops[3]->getType()};
}
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
Value *NewVL = Ops[2];
Ops.erase(Ops.begin() + 2);
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Ops, "");
llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0});
// Store new_vl.
clang::CharUnits Align;
if (IsMasked)
Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(E->getNumArgs()-2)->getType());
else
Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(1)->getType());
llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {1});
Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align));
return V;
}
}] in {
foreach type = types in {
// `vPCePz` is type `const T * -> SizeT * -> {VL} -> VectorType`
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
def : RVVBuiltin<"v", "vPCePz", type>;
if !not(IsFloat<type>.val) then {
// `UvPCUePz` is type `const unsigned T * -> SizeT * -> {VL} -> unsigned VectorType`
def : RVVBuiltin<"Uv", "UvPCUePz", type>;
}
}
}
}
}

let HasMaskedOffOperand = false,
Expand Down Expand Up @@ -376,6 +424,12 @@ defm th_vsse16: RVVVSSEBuiltin<"th_vsse", ["s","x"]>; // i16, f16
defm th_vsse32: RVVVSSEBuiltin<"th_vsse", ["i","f"]>; // i32, f32
defm th_vsse64: RVVVSSEBuiltin<"th_vsse", ["l","d"]>; // i64, f64

// 7.4. Unit-stride Fault-Only-First Loads Operations
defm th_vle8ff : RVVVLEFFBuiltin<"th_vleff", ["c"]>; // i8
defm th_vle16ff: RVVVLEFFBuiltin<"th_vleff", ["s","x"]>; // i16, f16
defm th_vle32ff: RVVVLEFFBuiltin<"th_vleff", ["i", "f"]>; // i32, f32
defm th_vle64ff: RVVVLEFFBuiltin<"th_vleff", ["l", "d"]>; // i64, f64

//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Operations
//===----------------------------------------------------------------------===//
Expand Down
611 changes: 331 additions & 280 deletions clang/include/clang/Basic/riscv_vector_xtheadv_wrappers.td

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadvector \
// RUN: -disable-O0-optnone -emit-llvm %s -o - | \
// RUN: opt -S -passes=mem2reg | \
// RUN: FileCheck --check-prefix=CHECK-RV64 %s

#include <riscv_vector.h>

typedef _Float16 float16_t;
typedef float float32_t;
typedef double float64_t;

// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i16> @test_th_vle16ff_v_i16m1
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.th.vleff.nxv4i16.i64(<vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 4 x i16> [[TMP1]]
//
vint16m1_t test_th_vle16ff_v_i16m1(const int16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_i16m1(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i16> @test_th_vle16ff_v_i16m2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.th.vleff.nxv8i16.i64(<vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
vint16m2_t test_th_vle16ff_v_i16m2(const int16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_i16m2(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i16> @test_th_vle16ff_v_i16m4
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.th.vleff.nxv16i16.i64(<vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 16 x i16> [[TMP1]]
//
vint16m4_t test_th_vle16ff_v_i16m4(const int16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_i16m4(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 32 x i16> @test_th_vle16ff_v_i16m8
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.th.vleff.nxv32i16.i64(<vscale x 32 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 32 x i16> [[TMP1]]
//
vint16m8_t test_th_vle16ff_v_i16m8(const int16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_i16m8(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i16> @test_th_vle16ff_v_u16m1
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.th.vleff.nxv4i16.i64(<vscale x 4 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 4 x i16> [[TMP1]]
//
vuint16m1_t test_th_vle16ff_v_u16m1(const uint16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_u16m1(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i16> @test_th_vle16ff_v_u16m2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.th.vleff.nxv8i16.i64(<vscale x 8 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
vuint16m2_t test_th_vle16ff_v_u16m2(const uint16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_u16m2(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i16> @test_th_vle16ff_v_u16m4
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.th.vleff.nxv16i16.i64(<vscale x 16 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 16 x i16> [[TMP1]]
//
vuint16m4_t test_th_vle16ff_v_u16m4(const uint16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_u16m4(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 32 x i16> @test_th_vle16ff_v_u16m8
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.th.vleff.nxv32i16.i64(<vscale x 32 x i16> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 32 x i16> [[TMP1]]
//
vuint16m8_t test_th_vle16ff_v_u16m8(const uint16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_u16m8(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 4 x half> @test_th_vle16ff_v_f16m1
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x half>, i64 } @llvm.riscv.th.vleff.nxv4f16.i64(<vscale x 4 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 4 x half> [[TMP1]]
//
vfloat16m1_t test_th_vle16ff_v_f16m1(const float16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_f16m1(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 8 x half> @test_th_vle16ff_v_f16m2
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 8 x half>, i64 } @llvm.riscv.th.vleff.nxv8f16.i64(<vscale x 8 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
vfloat16m2_t test_th_vle16ff_v_f16m2(const float16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_f16m2(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 16 x half> @test_th_vle16ff_v_f16m4
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 16 x half>, i64 } @llvm.riscv.th.vleff.nxv16f16.i64(<vscale x 16 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 16 x half> [[TMP1]]
//
vfloat16m4_t test_th_vle16ff_v_f16m4(const float16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_f16m4(base, new_vl, vl);
}

// CHECK-RV64-LABEL: define dso_local <vscale x 32 x half> @test_th_vle16ff_v_f16m8
// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { <vscale x 32 x half>, i64 } @llvm.riscv.th.vleff.nxv32f16.i64(<vscale x 32 x half> poison, ptr [[BASE]], i64 [[VL]])
// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP0]], 0
// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP0]], 1
// CHECK-RV64-NEXT: store i64 [[TMP2]], ptr [[NEW_VL]], align 8
// CHECK-RV64-NEXT: ret <vscale x 32 x half> [[TMP1]]
//
vfloat16m8_t test_th_vle16ff_v_f16m8(const float16_t *base, size_t *new_vl, size_t vl) {
return __riscv_th_vle16ff_v_f16m8(base, new_vl, vl);
}
Loading

0 comments on commit e4906da

Please sign in to comment.