Skip to content

Commit

Permalink
[CIR][CIRGen][Builtin][Neon] Lower vld1_dup and vld1q_dup (llvm#936)
Browse files Browse the repository at this point in the history
  • Loading branch information
ghehg authored and lanza committed Oct 19, 2024
1 parent 666dbcf commit 90a9be6
Show file tree
Hide file tree
Showing 2 changed files with 213 additions and 1 deletion.
6 changes: 5 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3451,7 +3451,11 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
}
case NEON::BI__builtin_neon_vld1_dup_v:
case NEON::BI__builtin_neon_vld1q_dup_v: {
llvm_unreachable("NYI");
cir::Address ptrAddr = PtrOp0.withElementType(vTy.getEltType());
mlir::Value val = builder.createLoad(getLoc(E->getExprLoc()), ptrAddr);
mlir::cir::VecSplatOp vecSplat = builder.create<mlir::cir::VecSplatOp>(
getLoc(E->getExprLoc()), vTy, val);
return vecSplat;
}
case NEON::BI__builtin_neon_vst1_lane_v:
case NEON::BI__builtin_neon_vst1q_lane_v: {
Expand Down
208 changes: 208 additions & 0 deletions clang/test/CIR/CodeGen/AArch64/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -17577,3 +17577,211 @@ int32x2_t test_vmovn_s64(int64x2_t a) {
// LLVM: [[VMOVN_I:%.*]] = trunc <2 x i64> [[A]] to <2 x i32>
// LLVM: ret <2 x i32> [[VMOVN_I]]
}

uint8x8_t test_vld1_dup_u8(uint8_t const * ptr) {
return vld1_dup_u8(ptr);
}

// CIR-LABEL: vld1_dup_u8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 8>

// LLVM: {{.*}}test_vld1_dup_u8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer

int8x8_t test_vld1_dup_s8(int8_t const * ptr) {
return vld1_dup_s8(ptr);
}

// CIR-LABEL: test_vld1_dup_s8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vld1_dup_s8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer

uint16x4_t test_vld1_dup_u16(uint16_t const * ptr) {
return vld1_dup_u16(ptr);
}

// CIR-LABEL: test_vld1_dup_u16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 4>

// LLVM: {{.*}}test_vld1_dup_u16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer

int16x4_t test_vld1_dup_s16(int16_t const * ptr) {
return vld1_dup_s16(ptr);
}

// CIR-LABEL: test_vld1_dup_s16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 4>

// LLVM: {{.*}}test_vld1_dup_s16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer

int32x2_t test_vld1_dup_s32(int32_t const * ptr) {
return vld1_dup_s32(ptr);
}

// CIR-LABEL: test_vld1_dup_s32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 2>

// LLVM: {{.*}}test_vld1_dup_s32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <2 x i32> poison, i32 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x i32> [[VEC]], <2 x i32> poison, <2 x i32> zeroinitializer

int64x1_t test_vld1_dup_s64(int64_t const * ptr) {
return vld1_dup_s64(ptr);
}

// CIR-LABEL: test_vld1_dup_s64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 1>

// LLVM: {{.*}}test_vld1_dup_s64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <1 x i64> poison, i64 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <1 x i64> [[VEC]], <1 x i64> poison, <1 x i32> zeroinitializer

float32x2_t test_vld1_dup_f32(float32_t const * ptr) {
return vld1_dup_f32(ptr);
}

// CIR-LABEL: test_vld1_dup_f32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 2>

// LLVM: {{.*}}test_vld1_dup_f32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <2 x float> poison, float [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x float> [[VEC]], <2 x float> poison, <2 x i32> zeroinitializer

float64x1_t test_vld1_dup_f64(float64_t const * ptr) {
return vld1_dup_f64(ptr);
}

// CIR-LABEL: test_vld1_dup_f64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 1>

// LLVM: {{.*}}test_vld1_dup_f64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <1 x double> poison, double [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <1 x double> [[VEC]], <1 x double> poison, <1 x i32> zeroinitializer

uint8x16_t test_vld1q_dup_u8(uint8_t const * ptr) {
return vld1q_dup_u8(ptr);
}

// CIR-LABEL: test_vld1q_dup_u8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 16>

// LLVM: {{.*}}test_vld1q_dup_u8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer

int8x16_t test_vld1q_dup_s8(int8_t const * ptr) {
return vld1q_dup_s8(ptr);
}

// CIR-LABEL: test_vld1q_dup_s8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vld1q_dup_s8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer

uint16x8_t test_vld1q_dup_u16(uint16_t const * ptr) {
return vld1q_dup_u16(ptr);
}

// CIR-LABEL: test_vld1q_dup_u16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 8>

// LLVM: {{.*}}test_vld1q_dup_u16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer

int16x8_t test_vld1q_dup_s16(int16_t const * ptr) {
return vld1q_dup_s16(ptr);
}

// CIR-LABEL: test_vld1q_dup_s16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 8>

// LLVM: {{.*}}test_vld1q_dup_s16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer

int32x4_t test_vld1q_dup_s32(int32_t const * ptr) {
return vld1q_dup_s32(ptr);
}

// CIR-LABEL: test_vld1q_dup_s32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 4>

// LLVM: {{.*}}test_vld1q_dup_s32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <4 x i32> poison, i32 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <4 x i32> zeroinitializer

int64x2_t test_vld1q_dup_s64(int64_t const * ptr) {
return vld1q_dup_s64(ptr);
}

// CIR-LABEL: test_vld1q_dup_s64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 2>

// LLVM: {{.*}}test_vld1q_dup_s64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <2 x i64> poison, i64 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x i64> [[VEC]], <2 x i64> poison, <2 x i32> zeroinitializer

float32x4_t test_vld1q_dup_f32(float32_t const * ptr) {
return vld1q_dup_f32(ptr);
}

// CIR-LABEL: test_vld1q_dup_f32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 4>

// LLVM: {{.*}}test_vld1q_dup_f32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x float> [[VEC]], <4 x float> poison, <4 x i32> zeroinitializer

float64x2_t test_vld1q_dup_f64(float64_t const * ptr) {
return vld1q_dup_f64(ptr);
}

// CIR-LABEL: test_vld1q_dup_f64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 2>

// LLVM: {{.*}}test_vld1q_dup_f64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <2 x double> poison, double [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x double> [[VEC]], <2 x double> poison, <2 x i32> zeroinitializer

0 comments on commit 90a9be6

Please sign in to comment.