Skip to content

Commit

Permalink
[CIR][CIRGen][Builtin][Neon] Lower neon_vpadd_v and neon_vpaddq_v int…
Browse files Browse the repository at this point in the history
…o llvm intrinsic (llvm#960)

This PR refactored Neon Built in code in clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp a bit to make it cleaner.

Also changed RUNOption of test file clang/test/CIR/CodeGen/AArch64/neon-arith.c to make test more concise, and easy to compare against OG (to compare, just remove -fclangir from llvm gen part of RUN, and the test should still pass)
  • Loading branch information
ghehg authored and lanza committed Oct 19, 2024
1 parent 90a9be6 commit 244734d
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 61 deletions.
60 changes: 42 additions & 18 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//

#include <utility>

#include "CIRGenCXXABI.h"
#include "CIRGenCall.h"
#include "CIRGenFunction.h"
Expand Down Expand Up @@ -2158,7 +2160,7 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
}
}

mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
mlir::Value buildNeonCall(CIRGenBuilderTy &builder,
llvm::SmallVector<mlir::Type> argTypes,
llvm::SmallVectorImpl<mlir::Value> &args,
llvm::StringRef intrinsicName, mlir::Type funcResTy,
Expand All @@ -2175,7 +2177,6 @@ mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
if (shift > 0)
llvm_unreachable("Argument shift NYI");

CIRGenBuilderTy &builder = cgf.getBuilder();
for (unsigned j = 0; j < argTypes.size(); ++j) {
if (isConstrainedFPIntrinsic) {
assert(!MissingFeatures::buildConstrainedFPCall());
Expand Down Expand Up @@ -2205,6 +2206,24 @@ static int64_t getIntValueFromConstOp(mlir::Value val) {
.getSExtValue();
}

/// This function `buildCommonNeonCallPattern0` implements a common way
// to generate neon intrinsic call that has following pattern:
// 1. There is a need to cast result of the intrinsic call back to
// expression type.
// 2. Function arg types are given, not deduced from actual arg types.
static mlir::Value
buildCommonNeonCallPattern0(CIRGenFunction &cgf, std::string &intrincsName,
llvm::SmallVector<mlir::Type> argTypes,
llvm::SmallVectorImpl<mlir::Value> &ops,
mlir::Type funcResTy, const clang::CallExpr *e) {
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Value res =
buildNeonCall(builder, std::move(argTypes), ops, intrincsName, funcResTy,
cgf.getLoc(e->getExprLoc()));
mlir::Type resultType = cgf.ConvertType(e->getType());
return builder.createBitcast(res, resultType);
}

mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic,
const char *nameHint, unsigned modifier, const CallExpr *e,
Expand Down Expand Up @@ -2269,18 +2288,25 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
default:
llvm::errs() << getAArch64SIMDIntrinsicString(builtinID) << " ";
llvm_unreachable("NYI");
case NEON::BI__builtin_neon_vqadd_v:
mlir::Value res = buildNeonCall(builtinID, *this, {vTy, vTy}, ops,
(intrinicId != altLLVMIntrinsic)
? "llvm.aarch64.neon.uqadd"
: "llvm.aarch64.neon.sqadd",
vTy, getLoc(e->getExprLoc()));
mlir::Type resultType = ConvertType(e->getType());
// AArch64 intrinsic one-element vector type cast to
// scalar type expected by the builtin
return builder.createBitcast(res, resultType);

case NEON::BI__builtin_neon_vpadd_v:
case NEON::BI__builtin_neon_vpaddq_v: {
std::string intrincsName = mlir::isa<mlir::FloatType>(vTy.getEltType())
? "llvm.aarch64.neon.faddp"
: "llvm.aarch64.neon.addp";
return buildCommonNeonCallPattern0(*this, intrincsName, {vTy, vTy}, ops,
vTy, e);
break;
}
case NEON::BI__builtin_neon_vqadd_v: {
std::string intrincsName = (intrinicId != altLLVMIntrinsic)
? "llvm.aarch64.neon.uqadd"
: "llvm.aarch64.neon.sqadd";
return buildCommonNeonCallPattern0(*this, intrincsName, {vTy, vTy}, ops,
vTy, e);
break;
}
}
return nullptr;
}

Expand Down Expand Up @@ -3085,9 +3111,8 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
// The prototype of builtin_neon_vqrshrun_n can be found at
// https://developer.arm.com/architectures/instruction-sets/intrinsics/
return buildNeonCall(
BuiltinID, *this,
{builder.getExtendedElementVectorType(ty, true), SInt32Ty}, Ops,
"llvm.aarch64.neon.sqrshrun", ty, getLoc(E->getExprLoc()));
builder, {builder.getExtendedElementVectorType(ty, true), SInt32Ty},
Ops, "llvm.aarch64.neon.sqrshrun", ty, getLoc(E->getExprLoc()));
case NEON::BI__builtin_neon_vqshrn_n_v:
llvm_unreachable("NYI");
case NEON::BI__builtin_neon_vrshrn_n_v:
Expand All @@ -3100,7 +3125,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
case NEON::BI__builtin_neon_vrnda_v:
case NEON::BI__builtin_neon_vrndaq_v: {
assert(!MissingFeatures::buildConstrainedFPCall());
return buildNeonCall(BuiltinID, *this, {ty}, Ops, "llvm.round", ty,
return buildNeonCall(builder, {ty}, Ops, "llvm.round", ty,
getLoc(E->getExprLoc()));
}
case NEON::BI__builtin_neon_vrndih_f16: {
Expand All @@ -3123,8 +3148,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
case NEON::BI__builtin_neon_vrndns_f32: {
mlir::Value arg0 = buildScalarExpr(E->getArg(0));
args.push_back(arg0);
return buildNeonCall(NEON::BI__builtin_neon_vrndns_f32, *this,
{arg0.getType()}, args, "llvm.roundeven.f32",
return buildNeonCall(builder, {arg0.getType()}, args, "llvm.roundeven.f32",
getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
}
case NEON::BI__builtin_neon_vrndph_f16: {
Expand Down
203 changes: 160 additions & 43 deletions clang/test/CIR/CodeGen/AArch64/neon-arith.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-cir -fno-clangir-call-conv-lowering -target-feature +neon %s -o %t.cir
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
// RUN: -fclangir -disable-O0-optnone \
// RUN: -flax-vector-conversions=none -fno-clangir-call-conv-lowering \
// RUN: -emit-cir -o %t.cir %s
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-llvm -fno-clangir-call-conv-lowering -target-feature +neon %s -o %t.ll

// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
// RUN: -fclangir -disable-O0-optnone \
// RUN: -flax-vector-conversions=none -fno-clangir-call-conv-lowering \
// RUN: -emit-llvm -o - %s \
// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// REQUIRES: aarch64-registered-target || arm-registered-target
Expand All @@ -28,19 +34,9 @@ float32_t test_vrndns_f32(float32_t a) {
// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.float>, !cir.float
// CIR: cir.return [[RET_VAL]] : !cir.float loc

// LLVM: define dso_local float @test_vrndns_f32(float [[ARG:%.*]])
// LLVM: store float [[ARG]], ptr [[ARG_SAVE:%.*]], align 4
// LLVM: [[P0:%.*]] = load float, ptr [[ARG_SAVE]], align 4,
// LLVM: store float [[P0]], ptr [[P0_SAVE:%.*]], align 4,
// LLVM: [[INTRIN_ARG:%.*]] = load float, ptr [[P0_SAVE]], align 4,
// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[INTRIN_ARG]])
// LLVM: store float [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 4,
// LLVM: [[RES_COPY0:%.*]] = load float, ptr [[RES_SAVE0]], align 4,
// LLVM: store float [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 4,
// LLVM: [[RES_COPY1:%.*]] = load float, ptr [[RES_SAVE1]], align 4,
// LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4,
// LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4,
// LLVM: ret float [[RET_VAL]]
// LLVM: {{.*}}test_vrndns_f32(float{{.*}}[[ARG:%.*]])
// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[ARG]])
// LLVM: ret float [[INTRIN_RES]]

float32x2_t test_vrnda_f32(float32x2_t a) {
return vrnda_f32(a);
Expand All @@ -62,19 +58,9 @@ float32x2_t test_vrnda_f32(float32x2_t a) {
// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
// CIR: cir.return [[RET_VAL]] : !cir.vector<!cir.float x 2>

// LLVM: define dso_local <2 x float> @test_vrnda_f32(<2 x float> [[ARG:%.*]])
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 8
// LLVM: [[P0:%.*]] = load <2 x float>, ptr [[ARG_SAVE]], align 8,
// LLVM: store <2 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 8,
// LLVM: [[INTRIN_ARG:%.*]] = load <2 x float>, ptr [[P0_SAVE]], align 8,
// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[INTRIN_ARG]])
// LLVM: store <2 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 8,
// LLVM: [[RES_COPY0:%.*]] = load <2 x float>, ptr [[RES_SAVE0]], align 8,
// LLVM: store <2 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 8,
// LLVM: [[RES_COPY1:%.*]] = load <2 x float>, ptr [[RES_SAVE1]], align 8,
// LLVM: store <2 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 8,
// LLVM: [[RET_VAL:%.*]] = load <2 x float>, ptr [[RET_P]], align 8,
// LLVM: ret <2 x float> [[RET_VAL]]
// LLVM: {{.*}}test_vrnda_f32(<2 x float>{{.*}}[[ARG:%.*]])
// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[ARG]])
// LLVM: ret <2 x float> [[INTRIN_RES]]

float32x4_t test_vrndaq_f32(float32x4_t a) {
return vrndaq_f32(a);
Expand All @@ -88,16 +74,147 @@ float32x4_t test_vrndaq_f32(float32x4_t a) {
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 4>

// LLVM: define dso_local <4 x float> @test_vrndaq_f32(<4 x float> [[ARG:%.*]])
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 16
// LLVM: [[P0:%.*]] = load <4 x float>, ptr [[ARG_SAVE]], align 16,
// LLVM: store <4 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 16,
// LLVM: [[INTRIN_ARG:%.*]] = load <4 x float>, ptr [[P0_SAVE]], align 16,
// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[INTRIN_ARG]])
// LLVM: store <4 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 16,
// LLVM: [[RES_COPY0:%.*]] = load <4 x float>, ptr [[RES_SAVE0]], align 16,
// LLVM: store <4 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 16,
// LLVM: [[RES_COPY1:%.*]] = load <4 x float>, ptr [[RES_SAVE1]], align 16,
// LLVM: store <4 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 16,
// LLVM: [[RET_VAL:%.*]] = load <4 x float>, ptr [[RET_P]], align 16,
// LLVM: ret <4 x float> [[RET_VAL]]
// LLVM: {{.*}}test_vrndaq_f32(<4 x float>{{.*}}[[ARG:%.*]])
// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[ARG]])
// LLVM: ret <4 x float> [[INTRIN_RES]]

int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
return vpadd_s8(a, b);
}

// CIR-LABEL: vpadd_s8
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vpadd_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
// LLVM: ret <8 x i8> [[RES]]


int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
return vpaddq_s8(a, b);
}

// CIR-LABEL: vpaddq_s8
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vpaddq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
// LLVM: ret <16 x i8> [[RES]]

uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
return vpadd_u8(a, b);
}

// CIR-LABEL: vpadd_u8
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>

// LLVM: {{.*}}test_vpadd_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
// LLVM: ret <8 x i8> [[RES]]

int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
return vpadd_s16(a, b);
}

// CIR-LABEL: vpadd_s16
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!s16i x 4>), !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vpadd_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
// LLVM: ret <4 x i16> [[RES]]

int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
return vpaddq_s16(a, b);
}

// CIR-LABEL: vpaddq_s16
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!s16i x 8>), !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vpaddq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
// LLVM: ret <8 x i16> [[RES]]

uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
return vpadd_u16(a, b);
}

// CIR-LABEL: vpadd_u16
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!u16i x 4>), !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vpadd_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
// LLVM: ret <4 x i16> [[RES]]

int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
return vpadd_s32(a, b);
}

// CIR-LABEL: vpadd_s32
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!s32i x 2>), !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vpadd_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[A]], <2 x i32> [[B]])
// LLVM: ret <2 x i32> [[RES]]

int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
return vpaddq_s32(a, b);
}

// CIR-LABEL: vpaddq_s32
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!s32i x 4>), !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vpaddq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
// LLVM: ret <4 x i32> [[RES]]

float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
return vpadd_f32(a, b);
}

// CIR-LABEL: vpadd_f32
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!cir.float x 2>), !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vpadd_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> [[A]], <2 x float> [[B]])
// LLVM: ret <2 x float> [[RES]]

float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
return vpaddq_f32(a, b);
}

// CIR-LABEL: vpaddq_f32
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!cir.float x 4>), !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vpaddq_f32(<4 x float>{{.*}}[[A:%.*]], <4 x float>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> [[A]], <4 x float> [[B]])
// LLVM: ret <4 x float> [[RES]]

float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
return vpaddq_f64(a, b);
}

// CIR-LABEL: vpaddq_f64
// CIR: [[RES:%.*]] = cir.llvm.intrinsic "llvm.aarch64.neon.addp" {{%.*}}, {{%.*}} :
// CIR-SAME: (!cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
// CIR: {{%.*}} = cir.cast(bitcast, [[RES]] : !cir.vector<!cir.double x 2>), !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vpaddq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
// LLVM: [[RES:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[A]], <2 x double> [[B]])
// LLVM: ret <2 x double> [[RES]]

0 comments on commit 244734d

Please sign in to comment.