diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 09d5a45b306d0..b7ca9aa85739e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -791,10 +791,24 @@ static void VisitField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, else if (ItemTy->isStructureOrClassType()) VisitAccessorWrapper(Owner, Item, ItemTy->getAsCXXRecordDecl(), handlers...); + // FIXME Enable this when structs are replaced by their fields +#define STRUCTS_DECOMPOSED 0 +#if STRUCTS_DECOMPOSED else if (ItemTy->isArrayType()) VisitArrayElements(Item, ItemTy, handlers...); + else if (ItemTy->isScalarType()) + KF_FOR_EACH(handleScalarType, Item, ItemTy); +} +#else } +template +static void VisitScalarField(CXXRecordDecl *Owner, RangeTy &&Item, + QualType ItemTy, Handlers &... handlers) { + KF_FOR_EACH(handleScalarType, Item, ItemTy); +} +#endif + template static void VisitArrayElements(RangeTy Item, QualType FieldTy, Handlers &... handlers) { @@ -803,10 +817,18 @@ static void VisitArrayElements(RangeTy Item, QualType FieldTy, int64_t ElemCount = CAT->getSize().getSExtValue(); std::initializer_list{(handlers.enterArray(), 0)...}; for (int64_t Count = 0; Count < ElemCount; Count++) { +#if STRUCTS_DECOMPOSED VisitField(nullptr, Item, ET, handlers...); +#else + if (ET->isScalarType()) + VisitScalarField(nullptr, Item, ET, handlers...); + else + VisitField(nullptr, Item, ET, handlers...); +#endif (void)std::initializer_list{(handlers.nextElement(ET), 0)...}; } - (void)std::initializer_list{(handlers.leaveArray(ET, ElemCount), 0)...}; + (void)std::initializer_list{ + (handlers.leaveArray(Item, ET, ElemCount), 0)...}; } template @@ -909,6 +931,9 @@ template class SyclKernelFieldHandler { virtual bool handleReferenceType(FieldDecl *, QualType) { return true; } virtual bool handlePointerType(FieldDecl *, QualType) { return true; } virtual bool handleArrayType(FieldDecl *, QualType) { return true; } + virtual bool handleScalarType(const CXXBaseSpecifier &, QualType) { + return true; + } virtual bool handleScalarType(FieldDecl *, QualType) { return true; } // Most handlers shouldn't be handling this, just the field checker. virtual bool handleOtherType(FieldDecl *, QualType) { return true; } @@ -917,21 +942,31 @@ template class SyclKernelFieldHandler { // class/field graph. Int Headers use this to calculate offset, most others // don't have a need for these. - virtual void enterStruct(const CXXRecordDecl *, FieldDecl *) {} - virtual void leaveStruct(const CXXRecordDecl *, FieldDecl *) {} - virtual void enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) {} + virtual bool enterStruct(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } // The following are used for stepping through array elements. - virtual void enterField(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void leaveField(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void enterField(const CXXRecordDecl *, FieldDecl *) {} - virtual void leaveField(const CXXRecordDecl *, FieldDecl *) {} - virtual void enterArray(const CXXBaseSpecifier &) {} - virtual void enterArray() {} - virtual void nextElement(QualType) {} - virtual void leaveArray(QualType, int64_t) {} + virtual bool enterField(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool leaveField(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool enterField(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool leaveField(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool enterArray() { return true; } + virtual bool nextElement(QualType) { return true; } + virtual bool leaveArray(const CXXBaseSpecifier &, QualType, int64_t) { + return true; + } + virtual bool leaveArray(FieldDecl *, QualType, int64_t) { return true; } }; // A type to check the validity of all of the argument types. @@ -947,10 +982,10 @@ class SyclKernelFieldChecker if (const auto *CAT = dyn_cast(FieldTy)) { QualType ET = CAT->getElementType(); return checkNotCopyableToKernel(FD, ET); - } else - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_constant_array_type) - << FieldTy; + } + return Diag.Report(FD->getLocation(), + diag::err_sycl_non_constant_array_type) + << FieldTy; } if (SemaRef.getASTContext().getLangOpts().SYCLStdLayoutKernelParams) @@ -1043,6 +1078,10 @@ class SyclKernelDeclCreator size_t LastParamIndex = 0; void addParam(const FieldDecl *FD, QualType FieldTy) { + const ConstantArrayType *CAT = + SemaRef.getASTContext().getAsConstantArrayType(FieldTy); + if (CAT) + FieldTy = CAT->getElementType(); ParamDesc newParamDesc = makeParamDesc(FD, FieldTy); addParam(newParamDesc, FieldTy); } @@ -1059,7 +1098,6 @@ class SyclKernelDeclCreator SemaRef.getASTContext(), KernelDecl, SourceLocation(), SourceLocation(), std::get<1>(newParamDesc), std::get<0>(newParamDesc), std::get<2>(newParamDesc), SC_None, /*DefArg*/ nullptr); - NewParam->setScopeInfo(0, Params.size()); NewParam->setIsUsed(); @@ -1185,6 +1223,7 @@ class SyclKernelDeclCreator return true; } + // FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy); return true; @@ -1209,6 +1248,8 @@ class SyclKernelDeclCreator return ArrayRef(std::begin(Params) + LastParamIndex, std::end(Params)); } + + using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::handleSyclSamplerType; }; @@ -1222,6 +1263,7 @@ class SyclKernelBodyCreator InitializedEntity VarEntity; CXXRecordDecl *KernelObj; llvm::SmallVector MemberExprBases; + uint64_t ArrayIndex; FunctionDecl *KernelCallerFunc; // Using the statements/init expressions that we've created, this generates @@ -1294,11 +1336,9 @@ class SyclKernelBodyCreator return Result; } - void createExprForStructOrScalar(FieldDecl *FD) { + Expr *createInitExpr(FieldDecl *FD) { ParmVarDecl *KernelParameter = DeclCreator.getParamVarDeclsForCurrentField()[0]; - InitializedEntity Entity = - InitializedEntity::InitializeMember(FD, &VarEntity); QualType ParamType = KernelParameter->getOriginalType(); Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, SourceLocation()); @@ -1308,14 +1348,48 @@ class SyclKernelBodyCreator DRE = ImplicitCastExpr::Create(SemaRef.Context, FD->getType(), CK_AddressSpaceConversion, DRE, nullptr, VK_RValue); + return DRE; + } + + void createExprForStructOrScalar(FieldDecl *FD) { + InitializedEntity Entity = + InitializedEntity::InitializeMember(FD, &VarEntity); InitializationKind InitKind = InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); + Expr *DRE = createInitExpr(FD); InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE); + ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE); + InitExprs.push_back(MemberInit.get()); + } + void createExprForScalarElement(FieldDecl *FD) { + InitializedEntity ArrayEntity = + InitializedEntity::InitializeMember(FD, &VarEntity); + InitializationKind InitKind = + InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); + Expr *DRE = createInitExpr(FD); + InitializedEntity Entity = InitializedEntity::InitializeElement( + SemaRef.getASTContext(), ArrayIndex, ArrayEntity); + ArrayIndex++; + InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE); ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE); InitExprs.push_back(MemberInit.get()); } + void addArrayInit(FieldDecl *FD, int64_t Count) { + llvm::SmallVector ArrayInitExprs; + for (int64_t I = 0; I < Count; I++) { + ArrayInitExprs.push_back(InitExprs.back()); + InitExprs.pop_back(); + } + std::reverse(ArrayInitExprs.begin(), ArrayInitExprs.end()); + Expr *ILE = new (SemaRef.getASTContext()) + InitListExpr(SemaRef.getASTContext(), SourceLocation(), ArrayInitExprs, + SourceLocation()); + ILE->setType(FD->getType()); + InitExprs.push_back(ILE); + } + void createSpecialMethodCall(const CXXRecordDecl *SpecialClass, Expr *Base, const std::string &MethodName, FieldDecl *Field) { @@ -1332,9 +1406,7 @@ class SyclKernelBodyCreator ParamDREs[I] = SemaRef.BuildDeclRefExpr(KernelParameters[I], ParamType, VK_LValue, SourceLocation()); } - - MemberExpr *SpecialObjME = BuildMemberExpr(Base, Field); - MemberExpr *MethodME = BuildMemberExpr(SpecialObjME, Method); + MemberExpr *MethodME = BuildMemberExpr(Base, Method); QualType ResultTy = Method->getReturnType(); ExprValueKind VK = Expr::getValueKindForType(ResultTy); @@ -1368,8 +1440,10 @@ class SyclKernelBodyCreator bool handleSpecialType(FieldDecl *FD, QualType Ty) { const auto *RecordDecl = Ty->getAsCXXRecordDecl(); - // Perform initialization only if it is field of kernel object - if (MemberExprBases.size() == 1) { + ArraySubscriptExpr *ArrayRef = + dyn_cast(MemberExprBases.back()); + // Perform initialization only if decomposed from array + if (ArrayRef || MemberExprBases.size() == 2) { InitializedEntity Entity = InitializedEntity::InitializeMember(FD, &VarEntity); // Initialize with the default constructor. @@ -1427,9 +1501,10 @@ class SyclKernelBodyCreator bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final { const auto *StreamDecl = Ty->getAsCXXRecordDecl(); createExprForStructOrScalar(FD); - createSpecialMethodCall(StreamDecl, MemberExprBases.back(), InitMethodName, - FD); - createSpecialMethodCall(StreamDecl, MemberExprBases.back(), + size_t NumBases = MemberExprBases.size(); + createSpecialMethodCall(StreamDecl, MemberExprBases[NumBases - 2], + InitMethodName, FD); + createSpecialMethodCall(StreamDecl, MemberExprBases[NumBases - 2], FinalizeMethodName, FD); return true; } @@ -1445,27 +1520,70 @@ class SyclKernelBodyCreator return true; } + // FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { createExprForStructOrScalar(FD); return true; } bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - createExprForStructOrScalar(FD); + if (dyn_cast(MemberExprBases.back())) + createExprForScalarElement(FD); + else + createExprForStructOrScalar(FD); return true; } - void enterStruct(const CXXRecordDecl *, FieldDecl *FD) final { - MemberExprBases.push_back(BuildMemberExpr(MemberExprBases.back(), FD)); + bool enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { + if (!FD->getType()->isReferenceType()) + MemberExprBases.push_back(BuildMemberExpr(MemberExprBases.back(), FD)); + return true; } - void leaveStruct(const CXXRecordDecl *, FieldDecl *FD) final { + bool leaveField(const CXXRecordDecl *, FieldDecl *FD) final { + if (!FD->getType()->isReferenceType()) + MemberExprBases.pop_back(); + return true; + } + + bool enterArray() final { + Expr *ArrayBase = MemberExprBases.back(); + ExprResult IndexExpr = SemaRef.ActOnIntegerConstant(SourceLocation(), 0); + ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( + ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); + MemberExprBases.push_back(ElementBase.get()); + ArrayIndex = 0; + return true; + } + + bool nextElement(QualType ET) final { + ArraySubscriptExpr *LastArrayRef = + dyn_cast(MemberExprBases.back()); MemberExprBases.pop_back(); + Expr *LastIdx = LastArrayRef->getIdx(); + llvm::APSInt Result; + SemaRef.VerifyIntegerConstantExpression(LastIdx, &Result); + Expr *ArrayBase = MemberExprBases.back(); + ExprResult IndexExpr = SemaRef.ActOnIntegerConstant( + SourceLocation(), Result.getExtValue() + 1); + ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( + ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); + MemberExprBases.push_back(ElementBase.get()); + return true; + } + + bool leaveArray(FieldDecl *FD, QualType, int64_t Count) final { + addArrayInit(FD, Count); + MemberExprBases.pop_back(); + return true; } - using SyclKernelFieldHandler::enterStruct; + using SyclKernelFieldHandler::enterArray; + using SyclKernelFieldHandler::enterField; + using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::handleSyclSamplerType; - using SyclKernelFieldHandler::leaveStruct; + using SyclKernelFieldHandler::leaveArray; + using SyclKernelFieldHandler::leaveField; }; class SyclKernelIntHeaderCreator @@ -1476,23 +1594,16 @@ class SyclKernelIntHeaderCreator const CXXRecordDecl *CurStruct = nullptr; int64_t CurOffset = 0; - uint64_t getOffset(const CXXRecordDecl *RD) const { - assert(CurOffset && - "Cannot have a base class without setting the active struct"); - const ASTRecordLayout &Layout = - SemaRef.getASTContext().getASTRecordLayout(CurStruct); - return CurOffset + Layout.getBaseClassOffset(RD).getQuantity(); - } - uint64_t getOffset(const FieldDecl *FD) const { - return CurOffset + SemaRef.getASTContext().getFieldOffset(FD) / 8; - } - - void addParam(const FieldDecl *FD, QualType FieldTy, + void addParam(const FieldDecl *FD, QualType ArgTy, SYCLIntegrationHeader::kernel_param_kind_t Kind) { - uint64_t Size = - SemaRef.getASTContext().getTypeSizeInChars(FieldTy).getQuantity(); + uint64_t Size; + const ConstantArrayType *CAT = + SemaRef.getASTContext().getAsConstantArrayType(ArgTy); + if (CAT) + ArgTy = CAT->getElementType(); + Size = SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); Header.addParamDesc(Kind, static_cast(Size), - static_cast(getOffset(FD))); + static_cast(CurOffset)); } public: @@ -1513,8 +1624,7 @@ class SyclKernelIntHeaderCreator int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - getOffset(BC.getType()->getAsCXXRecordDecl())); + Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, CurOffset); return true; } @@ -1526,8 +1636,7 @@ class SyclKernelIntHeaderCreator int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - getOffset(FD)); + Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, CurOffset); return true; } @@ -1568,10 +1677,13 @@ class SyclKernelIntHeaderCreator addParam(FD, FieldTy, SYCLIntegrationHeader::kind_pointer); return true; } + + // FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; } + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; @@ -1581,6 +1693,7 @@ class SyclKernelIntHeaderCreator addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; } + bool handleSyclStreamType(const CXXBaseSpecifier &BC, QualType FieldTy) final { // FIXME SYCL stream should be usable as a base type @@ -1588,33 +1701,50 @@ class SyclKernelIntHeaderCreator return true; } - // Keep track of the current struct offset. - void enterStruct(const CXXRecordDecl *RD, FieldDecl *FD) final { - CurStruct = FD->getType()->getAsCXXRecordDecl(); + bool enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { CurOffset += SemaRef.getASTContext().getFieldOffset(FD) / 8; + return true; } - void leaveStruct(const CXXRecordDecl *RD, FieldDecl *FD) final { - CurStruct = RD; + bool leaveField(const CXXRecordDecl *, FieldDecl *FD) final { CurOffset -= SemaRef.getASTContext().getFieldOffset(FD) / 8; + return true; } - void enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { - CurStruct = BS.getType()->getAsCXXRecordDecl(); + bool enterField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset += Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); + return true; } - void leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { - CurStruct = RD; + bool leaveField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset -= Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); + return true; } + + bool nextElement(QualType ET) final { + CurOffset += SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + return true; + } + + bool leaveArray(FieldDecl *, QualType ET, int64_t Count) final { + int64_t ArraySize = + SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + if (!ET->isArrayType()) { + ArraySize *= Count; + } + CurOffset -= ArraySize; + return true; + } + + using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::handleSyclSamplerType; + using SyclKernelFieldHandler::leaveArray; }; } // namespace diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp new file mode 100755 index 0000000000000..8c2cfb2a1bd8b --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only +// RUN: FileCheck -input-file=%t.h %s + +// This test checks the integration header generated when +// the kernel argument is an Accessor array. + +// CHECK: #include + +// CHECK: class kernel_A; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_A" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_A +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + Accessor acc[2]; + + a_kernel([=]() { acc[1].use(); }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp new file mode 100644 index 0000000000000..ec8ac8bc01f5f --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks a kernel argument that is an Accessor array + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + Accessor acc[2]; + + a_kernel( + [=]() { + acc[1].use(); + }); +} + +// Check kernel_A parameters +// CHECK: define spir_kernel void @{{.*}}kernel_A +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_4]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_6]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_8]]) + +// CHECK alloca for pointer arguments +// CHECK: [[MEM_ARG1:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 +// CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 + +// CHECK lambda object alloca +// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 + +// CHECK allocas for ranges +// CHECK: [[ACC_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" +// CHECK: [[ACC_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" + +// CHECK accessor array default inits +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[BEGIN:%[a-zA-Z0-9._]*]] = getelementptr inbounds [2 x [[ACCESSOR:.*]]], [2 x [[ACCESSOR]]]* [[ACCESSOR_ARRAY1]], i64 0, i64 0 +// CHECK: [[END:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR:.*]], [[ACCESSOR]]* [[BEGIN]], i64 2 +// CHECK: [[NEXT0:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 +// CHECK: [[ELEMENT:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 +// CHECK: [[ELEMENT:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 2 +// CHECK: [[NEXT1:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 + +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[INDEX:%[a-zA-Z0-9._]*]] = getelementptr inbounds [2 x [[ACCESSOR]]], [2 x [[ACCESSOR]]]* [[ACCESSOR_ARRAY2]], i64 0, i64 0 + +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]] + +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast [[ACCESSOR]]* {{.*}} to [[ACCESSOR]] addrspace(4)* + +// CHECK acc[0] __init method call +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) + +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG2]] + +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast [[ACCESSOR]]* {{.*}} to [[ACCESSOR]] addrspace(4)* + +// CHECK acc[1] __init method call +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp new file mode 100644 index 0000000000000..141191219b4dc --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only +// RUN: FileCheck -input-file=%t.h %s +// XFAIL for now due to : https://github.com/intel/llvm/issues/2018 +// XFAIL: * + +// This test checks the integration header when kernel argument +// is a struct containing an Accessor array. + +// CHECK: #include + +// CHECK: class kernel_C; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_C" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_C +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_C +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + struct struct_acc_t { + Accessor member_acc[2]; + } struct_acc; + + a_kernel( + [=]() { + struct_acc.member_acc[1].use(); + }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp new file mode 100644 index 0000000000000..ae476edf08c2e --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -fsycl-int-header=%t.h -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// XFAIL: * + +// This test checks a kernel with struct parameter that contains an Accessor array. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + struct struct_acc_t { + Accessor member_acc[2]; + } struct_acc; + + a_kernel( + [=]() { + struct_acc.member_acc[1].use(); + }); +} + +// CHECK kernel_C parameters +// CHECK: define spir_kernel void @{{.*}}kernel_C +// CHECK-SAME: %struct.{{.*}}.struct_acc_t* byval(%struct.{{.*}}.struct_acc_t) align 4 [[STRUCT:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1:%[a-zA-Z0-9_]+3]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+4]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+6]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2:%[a-zA-Z0-9_]+8]]) + +// Check alloca for pointer arguments +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 + +// Check lambda object alloca +// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 + +// Check allocas for ranges +// CHECK: [[ACC_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" +// CHECK: [[ACC_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" + +// Check init of local struct +// CHECK: [[L_STRUCT_ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMCPY_DST:%[0-9a-zA-Z_]+]] = bitcast %struct.{{.*}}struct_acc_t* [[L_STRUCT_ADDR]] to i8* +// CHECK: [[MEMCPY_SRC:%[0-9a-zA-Z_]+]] = bitcast %struct.{{.*}}struct_acc_t* %{{[0-9a-zA-Z_]+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[MEMCPY_DST]], i8* align 4 [[MEMCPY_SRC]], i64 24, i1 false) + +// Check accessor array GEP for member_acc[0] +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMBER1:%[a-zA-Z_]+]] = getelementptr inbounds %struct.{{.*}}.struct_acc_t, %struct.{{.*}}.struct_acc_t* [[ACCESSOR_ARRAY1]], i32 0, i32 0 +// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds [2 x %"class.{{.*}}.cl::sycl::accessor"], [2 x %"class.{{.*}}.cl::sycl::accessor"]* [[MEMBER1]], i64 0, i64 0 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[0] __init method call +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) + +// Check accessor array GEP for member_acc[1] +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMBER2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct.{{.*}}.struct_acc_t, %struct.{{.*}}.struct_acc_t* [[ACCESSOR_ARRAY2]], i32 0, i32 0 +// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds [2 x %"class.{{.*}}.cl::sycl::accessor"], [2 x %"class.{{.*}}.cl::sycl::accessor"]* [[MEMBER2]], i64 0, i64 1 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[1] __init method call +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp new file mode 100755 index 0000000000000..49fd34d3206e5 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only +// RUN: FileCheck -input-file=%t.h %s + +// This test checks the integration header generated for a kernel +// with an argument that is a POD array. + +// CHECK: #include + +// CHECK: class kernel_B; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_B" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_B +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 4 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_B +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + int a[5]; + + a_kernel( + [=]() { + int local = a[3]; + }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp new file mode 100755 index 0000000000000..9a35239ad1fb8 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks a kernel with an argument that is a POD array. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + int a[2]; + + a_kernel( + [=]() { + int local = a[1]; + }); +} + +// Check kernel_B parameters +// CHECK: define spir_kernel void @{{.*}}kernel_B +// CHECK-SAME: i32 [[ELEM_ARG0:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 [[ELEM_ARG1:%[a-zA-Z_]+_[0-9]+]]) + +// Check local lambda object alloca +// CHECK: [[LOCAL_OBJECT:%[0-9]+]] = alloca %"class.{{.*}}.anon", align 4 + +// Check local variables created for parameters +// CHECK: store i32 [[ELEM_ARG0]], i32* [[ELEM_L0:%[a-zA-Z_]+.addr]], align 4 +// CHECK: store i32 [[ELEM_ARG1]], i32* [[ELEM_L1:%[a-zA-Z_]+.addr[0-9]*]], align 4 + +// Check init of local array +// CHECK: [[ARRAY:%[0-9]*]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[ARRAY_BEGIN:%[a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAY]], i64 0, i64 0 +// CHECK: [[ARRAY0:%[0-9]*]] = load i32, i32* [[ELEM_L0]], align 4 +// CHECK: store i32 [[ARRAY0]], i32* [[ARRAY_BEGIN]], align 4 +// CHECK: [[ARRAY_ELEMENT:%[a-zA-Z_.]+]] = getelementptr inbounds i32, i32* %arrayinit.begin, i64 1 +// CHECK: [[ARRAY1:%[0-9]*]] = load i32, i32* [[ELEM_L1]], align 4 +// CHECK: store i32 [[ARRAY1]], i32* [[ARRAY_ELEMENT]], align 4 \ No newline at end of file diff --git a/clang/test/SemaSYCL/array-kernel-param-neg.cpp b/clang/test/SemaSYCL/array-kernel-param-neg.cpp new file mode 100755 index 0000000000000..0618014c9fb10 --- /dev/null +++ b/clang/test/SemaSYCL/array-kernel-param-neg.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -fsyntax-only %s + +// This test checks if compiler reports compilation error on an attempt to pass +// an array of non-trivially copyable structs as SYCL kernel parameter or +// a non-constant size array. + +struct B { + int i; + B(int _i) : i(_i) {} + B(const B &x) : i(x.i) {} +}; + +struct D { + int i; + ~D(); +}; + +class E { + // expected-error@+1 {{kernel parameter is not a constant size array}} + int i[]; + +public: + int operator()() { return i[0]; } +}; + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +void test() { + B nsl1[4] = {1, 2, 3, 4}; + D nsl2[5]; + E es; + kernel_single_task([=] { + // expected-error@+1 {{kernel parameter has non-trivially copy constructible class/struct type}} + int b = nsl1[2].i; + // expected-error@+1 {{kernel parameter has non-trivially destructible class/struct type}} + int d = nsl2[4].i; + }); + + kernel_single_task(es); +} diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp new file mode 100755 index 0000000000000..c8bdb390467a1 --- /dev/null +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct kernel arguments for +// arrays, Accessor arrays, and structs containing Accessors. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + Accessor acc[2]; + int a[2]; + struct struct_acc_t { + Accessor member_acc[4]; + } struct_acc; + + a_kernel( + [=]() { + acc[1].use(); + }); + + a_kernel( + [=]() { + int local = a[1]; + }); + + a_kernel( + [=]() { + struct_acc.member_acc[2].use(); + }); +} + +// Check kernel_A parameters +// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::id<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::id<1>' +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init + +// Check kernel_B parameters +// CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (int, int)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' +// Check kernel_B inits +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} cinit +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: InitListExpr {{.*}} 'int [2]' +// CHECK: ImplicitCastExpr +// CHECK: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' +// CHECK: ImplicitCastExpr +// CHECK: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' + +// Correct and enable after struct members are extracted into separate parameters +// C HECK kernel_C parameters +// C HECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// C HECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' + +// C HECK that four accessor init functions are called +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init diff --git a/sycl/doc/CompilerAndRuntimeDesign.md b/sycl/doc/CompilerAndRuntimeDesign.md index 18841a07c0b6c..4fae3b6fb4880 100644 --- a/sycl/doc/CompilerAndRuntimeDesign.md +++ b/sycl/doc/CompilerAndRuntimeDesign.md @@ -168,7 +168,8 @@ __kernel KernelName(global int* a) { ``` OpenCL kernel function is generated by the compiler inside the Sema using AST -nodes. +nodes. Additional details of kernel parameter passing may be found in the document +[SYCL Kernel Parameter Handling and Array Support](KernelParameterPassing.md) . ### SYCL support in the driver diff --git a/sycl/doc/KernelParameterPassing.md b/sycl/doc/KernelParameterPassing.md new file mode 100755 index 0000000000000..fc71e4b173851 --- /dev/null +++ b/sycl/doc/KernelParameterPassing.md @@ -0,0 +1,428 @@ +

SYCL Kernel Parameter Handling and Array Support

+ +

Introduction

+ +This document describes how parameters of SYCL kernels are passed +from host to device. Support for arrays as kernel parameters was added +later and aspects of that design are covered in more detail. +The special treatment of arrays of `sycl::accessor` objects is also discussed. +Array support covers these cases: + +1. arrays of standard-layout type +2. arrays of accessors +3. arrays of structs that contain accessor arrays or accessor fields + +The motivation for allowing arrays as kernel parameters is to +bring consistency to the treatment of arrays. +In C++ a lambda function is allowed to access an element of an array +defined outside the lambda. The compiler captures the entire array +by value. Note that this behavior is limited to implicit +capture of the array by value. If the array name were in +the capture list then the base address of the array would be captured +and not the entire array. + +A user would expect the same mode of array capture in a SYCL kernel +lambda object as in any other lambda object. + +The first few sections describe the overall design. +The last three sections provide additional details of array support. +The implementation of this design is confined to four classes in the +file `SemaSYCL.cpp`. +

A SYCL Kernel

+ +The SYCL constructs `single_task`, `parallel_for`, and +`parallel_for_work_group` each take a function object or a lambda function + as one of their arguments. The code within the function object or +lambda function is executed on the device. +To enable execution of the kernel on OpenCL devices, the lambda/function object +is converted into the format of an OpenCL kernel. + +

SYCL Kernel Code Generation

+ +Consider a source code example that captures an int, a struct and an accessor +by value: + +```C++ +constexpr size_t c_num_items = 10; +range<1> num_items{c_num_items}; // range<1>(num_items) + +int main() +{ + int output[c_num_items]; + queue myQueue; + + int i = 55; + struct S { + int m; + } s = { 66 }; + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = i + s.m; + }); + }); + + return 0; +} +``` + +The input to the code generation routines is a function object that represents +the kernel. In pseudo-code: + +```C++ +struct Capture { + sycl::accessor outAcc; + int i; + struct S s; + () { + outAcc[index] = i + s.m; + } +} +``` + +The compiler-generated code for a call to such a lambda function would look like this: +```C++ +()(struct Capture* this); +``` + +When offloading the kernel to a device, the lambda/function object's +function operator cannot be directly called with a capture object address. +Instead, the code generated for the device is in the form of a +“kernel caller” and a “kernel callee”. +The callee is a clone of the SYCL kernel object. +The caller is generated in the form of an OpenCL kernel function. +It receives the lambda capture object in pieces, assembles the pieces +into the original lambda capture object and then calls the callee: + +```C++ +spir_kernel void caller( + __global int* AccData, // arg1 of accessor init function + range<1> AccR1, // arg2 of accessor init function + range<1> AccR2, // arg3 of accessor init function + id<1> I, // arg4 of accessor init function + int i, + struct S s +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + local.i = i; + local.s = s; + // Call accessor’s init function + sycl::accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + // Call the kernel body + callee(&local, id<1> wi); +} + +spir_func void callee(struct Capture* this, id<1> wi) +{ +} +``` + +As may be observed from the example above, standard-layout lambda capture +components are passed by value to the device as separate parameters. +This includes scalars, pointers, and standard-layout structs. +Certain object types defined by the SYCL standard, such as +`sycl::accessor` and `sycl::sampler` although standard-layout, cannot be +simply copied from host to device. Their layout on the device may be different +from that on the host. Some host fields may be absent on the device, +other host fields replaced with device-specific fields and +the host data pointer field must be translated to an OpenCL +or L0 memory object before it can be passed as a kernel parameter. +To enable all of this, the parameters of the `sycl::accessor` +and `sycl::sampler` init functions are transfered from +host to device separately. The values received on the device +are passed to the `init` functions executed on the device, +which results in the reassembly of the SYCL object in a form usable on the device. + +There is one other aspect of code generation. An “integration header” +is generated for use during host compilation. +This header file contains entries for each kernel. +Among the items it defines is a table of sizes and offsets of the +kernel parameters. +For the source example above the integration header contains the +following snippet: + +```C++ +// array representing signatures of all kernels defined in the +// corresponding source +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE19->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 4, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 36 }, +}; +``` + +Each entry in the kernel_signatures table is a `kernel_param_desc_t` +object which contains three values: +1) an encoding of the type of capture object member +2) a field that encodes additional properties, and +3) an offset within the lambda object where the value of that kernel argument is placed + +The previous sections described how kernel arguments are handled today. +The next three sections describe support for arrays. + +

Fix 1: Kernel Arguments that are Standard-Layout Arrays

+ +As described earlier, each variable captured by a lambda that comprises a +SYCL kernel becomes a parameter of the kernel caller function. +For arrays, simply allowing them through would result in a +function parameter of array type. This is not supported in C++. +Therefore, the array needing capture is decomposed into its elements for +the purposes of passing to the device. Each array element is passed as a +separate parameter. The array elements received on the device +are copied into the array within the local capture object. + +

Source code fragment:

+ +```C++ + constexpr int num_items = 2; + int array[num_items]; + int output[num_items]; + + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = array[index.get(0)]; + }); + }); +``` + +

Integration header produced:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ1fRN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 4, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 36 }, + +}; + +``` + +

The changes to device code made to support this extension, in pseudo-code:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + int array[num_items]; + () { + // Body + } +} + +spir_kernel void caller( + __global int* AccData, // arg1 of accessor init function + range<1> AccR1, // arg2 of accessor init function + range<1> AccR2, // arg3 of accessor init function + id<1> I, // arg4 of accessor init function + int p_array_0; // Pass array element 0 + int p_array_1; // Pass array element 1 +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Initialize array using existing clang Initialization mechanisms + local.array[0] = p_array_0; + local.array[1] = p_array_1; + // Call accessor’s init function + sycl::accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + callee(&local, id<1> wi); +} +``` + +

Fix 2: Kernel Arguments that are Arrays of Accessors

+ +Arrays of accessors are supported in a manner similar to that of a plain +accessor. For each accessor array element, the four values required to +call its init function are passed as separate arguments to the kernel. +Reassembly within the kernel caller is done by calling the `init` functions +of each accessor array element in ascending index value. + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + Accessor inAcc[2] = {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)}; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = inAcc[0][index] + inAcc[1][index]; + }); + }); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_accessor, 4062, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 64 }, +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + sycl::accessor inAcc[2]; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + __global int* inAccData_0, // args of inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Call outAcc accessor’s init function + sycl::accessor::init(&local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // Call inAcc[0] accessor’s init function + sycl::accessor::init(&local.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // Call inAcc[1] accessor’s init function + sycl::accessor::init(&local.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` + +

Fix 3: Accessor Arrays within Structs

+ +Kernel parameters that are structs are traversed member +by member, recursively, to enumerate member structs that are one of +the SYCL special types: `sycl::accessor` and `sycl::sampler`. +The arguments of the `init` functions of each special struct encountered +in the traversal are added as separate arguments to the kernel. +Support for arrays containing SYCL special types +builds upon the support for single accessors within structs. +Each element of such arrays is treated as +an individual object, and the arguments of its init function +are added to the kernel arguments in sequence. +Within the kernel caller function, the lambda object is reassembled +in a manner similar to other instances of accessor arrays. + + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int m; + sycl::accessor inAcc[2]; + } s = { 55, + {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)} + }; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = s.m + s.inAcc[0][index] + s.inAcc[1][index]; + }); +}); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 72, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 40 }, + { kernel_param_kind_t::kind_accessor, 4062, 72 }, + +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + struct S s; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + struct S s, // the struct S + __global int* inAccData_0, // args of s.inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of s.inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + + // 1. Copy struct argument contents to local copy + local.s = s; + + // 2. Initialize accessors by calling init functions + // 2a. Call outAcc accessor’s init function + sycl::accessor::init( + &local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // 2b. Call s.inAcc[0] accessor’s init function + sycl::accessor::init( + &local.s.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // 2c. Call s.inAcc[1] accessor’s init function + sycl::accessor::init( + &local.s.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` diff --git a/sycl/test/array_param/array-kernel-param-nested-run.cpp b/sycl/test/array_param/array-kernel-param-nested-run.cpp new file mode 100755 index 0000000000000..28b9469cda89a --- /dev/null +++ b/sycl/test/array_param/array-kernel-param-nested-run.cpp @@ -0,0 +1,135 @@ +// This test checks kernel execution with array parameters inside structs. + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// XFAIL: * + +#include +#include + +using namespace cl::sycl; + +constexpr size_t c_num_items = 100; +range<1> num_items{c_num_items}; // range<1>(num_items) + +// Change if tests are added/removed +static int testCount = 1; +static int passCount; + +template +static bool verify_1D(const char *name, int X, T A, T A_ref) { + int ErrCnt = 0; + + for (int i = 0; i < X; i++) { + if (A_ref[i] != A[i]) { + if (++ErrCnt < 10) { + std::cout << name << " mismatch at " << i << ". Expected " << A_ref[i] + << " result is " << A[i] << "\n"; + } + } + } + + if (ErrCnt == 0) { + return true; + } + std::cout << " Failed. Failure rate: " << ErrCnt << "/" << X << "(" + << ErrCnt / (float)X * 100.f << "%)\n"; + return false; +} + +template +void init(T &A, int value, int increment) { + for (int i = 0; i < c_num_items; i++) { + A[i] = value; + value += increment; + } +} + +bool test_accessor_array_in_struct(queue &myQueue) { + std::array input1; + std::array input2; + std::array output; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 35, 2); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + auto out_buffer = buffer(output.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int w; + int x; + Accessor a[2]; + int y; + int z; + } S = { + 3, 3, {in_buffer1.get_access(cgh), in_buffer2.get_access(cgh)}, 7, 7}; + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + S.a[0][index]++; + S.a[1][index]++; + output_accessor[index] = S.a[0][index] + S.a[1][index] + S.x + S.y; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D("Accessor array in struct", c_num_items, output, ref); +} + +bool run_tests() { + queue Q([](exception_list L) { + for (auto ep : L) { + try { + std::rethrow_exception(ep); + } catch (std::exception &E) { + std::cout << "*** std exception caught:\n"; + std::cout << E.what(); + } catch (cl::sycl::exception const &E1) { + std::cout << "*** SYCL exception caught:\n"; + std::cout << E1.what(); + } + } + }); + + passCount = 0; + if (test_accessor_array_in_struct(Q)) { + ++passCount; + } + + auto D = Q.get_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << passCount << " of " << testCount << " tests passed on " + << devType << "\n"; + + return (testCount == passCount); +} + +int main(int argc, char *argv[]) { + bool passed = true; + default_selector selector{}; + auto D = selector.select_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << "Running on device " << devType << " (" + << D.get_info() << ")\n"; + try { + passed &= run_tests(); + } catch (exception e) { + std::cout << e.what(); + } + + if (!passed) { + std::cout << "FAILED\n"; + return 1; + } + std::cout << "PASSED\n"; + return 0; +} diff --git a/sycl/test/array_param/array-kernel-param-run.cpp b/sycl/test/array_param/array-kernel-param-run.cpp new file mode 100755 index 0000000000000..5ed29a410997e --- /dev/null +++ b/sycl/test/array_param/array-kernel-param-run.cpp @@ -0,0 +1,222 @@ +// This test checks kernel execution with array kernel parameters. + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include + +using namespace cl::sycl; + +constexpr size_t c_num_items = 100; +range<1> num_items{c_num_items}; // range<1>(num_items) + +// Change if tests are added/removed +static int testCount = 4; +static int passCount; + +template +static bool verify_1D(const char *name, int X, T A, T A_ref) { + int ErrCnt = 0; + + for (int i = 0; i < X; i++) { + if (A_ref[i] != A[i]) { + if (++ErrCnt < 10) { + std::cout << name << " mismatch at " << i << ". Expected " << A_ref[i] + << " result is " << A[i] << "\n"; + } + } + } + + if (ErrCnt == 0) { + return true; + } + std::cout << " Failed. Failure rate: " << ErrCnt << "/" << X << "(" + << ErrCnt / (float)X * 100.f << "%)\n"; + return false; +} + +template +void init(T &A, int value, int increment) { + for (int i = 0; i < c_num_items; i++) { + A[i] = value; + value += increment; + } +} + +bool test_one_array(queue &myQueue) { + int input1[c_num_items]; + int output[c_num_items]; + int ref[c_num_items]; + init(input1, 1, 1); + init(output, 51, 1); + init(ref, 2, 1); + + auto out_buffer = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = input1[index] + 1; + }); + }); + const auto HostAccessor = + out_buffer.get_access(); + + return verify_1D("One array", c_num_items, output, ref); +} + +bool test_two_arrays(queue &myQueue) { + int input1[c_num_items]; + int input2[c_num_items]; + int output[c_num_items]; + int ref[c_num_items]; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 23, 2); + + auto out_buffer = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = input1[index] + input2[index]; + }); + }); + const auto HostAccessor = + out_buffer.get_access(); + + return verify_1D("Two arrays", c_num_items, output, ref); +} + +bool test_accessor_arrays_1(queue &myQueue) { + std::array input1; + std::array input2; + int input3[c_num_items]; + int input4[c_num_items]; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(input3, 5, 1); + init(input4, -7, 1); + init(ref, 22, 3); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = accessor; + Accessor a[2] = { + in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh), + }; + + cgh.parallel_for( + num_items, [=](cl::sycl::id<1> index) { + a[0][index] = a[1][index] + input3[index] + input4[index] + 2; + }); + }); + const auto HostAccessor = + in_buffer1.get_access(); + + return verify_1D>("Accessor arrays 1", + c_num_items, input1, ref); +} + +bool test_accessor_arrays_2(queue &myQueue) { + std::array input1; + std::array input2; + std::array output; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 23, 2); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + auto out_buffer = buffer(output.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = accessor; + Accessor a[4] = {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh), + in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)}; + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for( + num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = a[0][index] + a[3][index]; + }); + }); + const auto HostAccessor = + out_buffer.get_access(); + + return verify_1D>("Accessor arrays 2", + c_num_items, output, ref); +} + +bool run_tests() { + queue Q([](exception_list L) { + for (auto ep : L) { + try { + std::rethrow_exception(ep); + } catch (std::exception &E) { + std::cout << "*** std exception caught:\n"; + std::cout << E.what(); + } catch (cl::sycl::exception const &E1) { + std::cout << "*** SYCL exception caught:\n"; + std::cout << E1.what(); + } + } + }); + + passCount = 0; + if (test_one_array(Q)) { + ++passCount; + } + if (test_two_arrays(Q)) { + ++passCount; + } + if (test_accessor_arrays_1(Q)) { + ++passCount; + } + if (test_accessor_arrays_2(Q)) { + ++passCount; + } + + auto D = Q.get_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << passCount << " of " << testCount << " tests passed on " + << devType << "\n"; + + return (testCount == passCount); +} + +int main(int argc, char *argv[]) { + bool passed = true; + default_selector selector{}; + auto D = selector.select_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << "Running on device " << devType << " (" + << D.get_info() << ")\n"; + try { + passed &= run_tests(); + } catch (exception e) { + std::cout << e.what(); + } + + if (!passed) { + std::cout << "FAILED\n"; + return 1; + } + std::cout << "PASSED\n"; + return 0; +}