From 8bed533f01cb5c9f6635585fe92042f1a4016368 Mon Sep 17 00:00:00 2001 From: Viktoria Maximova Date: Tue, 21 Jan 2020 19:28:56 +0300 Subject: [PATCH 1/4] [SYCL][FPGA] Enable uses_global_work_offset attribute (#1010) `uses_global_work_offset` attribute applies to a device function/lambda function or function call operator. Valid values are 0 and 1. If value >1 is specified as an argument, we warn user that it will be adjusted to 1. As attribute is not ignored in this case and it does not match any existing diagnostic group, the new `AdjustedAttributes` group was added. Signed-off-by: Viktoria Maksimova --- clang/include/clang/Basic/Attr.td | 9 ++++ clang/include/clang/Basic/AttrDocs.td | 10 ++++ .../include/clang/Basic/AttributeCommonInfo.h | 3 +- clang/include/clang/Basic/DiagnosticGroups.td | 4 +- .../clang/Basic/DiagnosticSemaKinds.td | 3 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 11 +++++ clang/lib/Sema/SemaDeclAttr.cpp | 27 ++++++++++ clang/lib/Sema/SemaSYCL.cpp | 11 ++++- .../intel-fpga-uses-global-work-offset.cpp | 28 +++++++++++ .../intel-fpga-uses-global-work-offset.cpp | 49 +++++++++++++++++++ 10 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenSYCL/intel-fpga-uses-global-work-offset.cpp create mode 100644 clang/test/SemaSYCL/intel-fpga-uses-global-work-offset.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4adcd69f0a98f..49c05bfd46c35 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1144,6 +1144,15 @@ def SYCLIntelMaxGlobalWorkDim : InheritableAttr { let PragmaAttributeSupport = 0; } +def SYCLIntelUsesGlobalWorkOffset : InheritableAttr { + let Spellings = [CXX11<"intelfpga","uses_global_work_offset">]; + let Args = [BoolArgument<"Enabled">]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [SYCLIntelUsesGlobalWorkOffsetDocs]; + let PragmaAttributeSupport = 0; +} + def C11NoReturn : InheritableAttr { let Spellings = [Keyword<"_Noreturn">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index af0b0e3a457cc..b311b8cc42a43 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2009,6 +2009,16 @@ device kernel, the attribute is ignored and it is not propagated to a kernel. }]; } +def SYCLIntelUsesGlobalWorkOffsetDocs : Documentation { + let Category = DocCatFunction; + let Heading = "uses_global_work_offset (IntelFPGA)"; + let Content = [{ +Applies to a device function/lambda function or function call operator (of a +function object). If 0, compiler doesn't use the global work offset values for +the device function. Valid values are 0 and 1. + }]; +} + def SYCLFPGAPipeDocs : Documentation { let Category = DocCatStmt; let Heading = "pipe (read_only, write_only)"; diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 1ffcb1474b6d4..f2ea62e9f9fd2 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -158,7 +158,8 @@ class AttributeCommonInfo { (ParsedAttr == AT_IntelReqdSubGroupSize && isCXX11Attribute()) || ParsedAttr == AT_SYCLIntelNumSimdWorkItems || ParsedAttr == AT_SYCLIntelMaxWorkGroupSize || - ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim) + ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || + ParsedAttr == AT_SYCLIntelUsesGlobalWorkOffset) return true; return false; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 8b4c18a5c4f9c..b5d3dd3e3b72f 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -637,8 +637,10 @@ def NSReturnsMismatch : DiagGroup<"nsreturns-mismatch">; def IndependentClassAttribute : DiagGroup<"IndependentClass-attribute">; def UnknownAttributes : DiagGroup<"unknown-attributes">; def IgnoredAttributes : DiagGroup<"ignored-attributes">; +def AdjustedAttributes : DiagGroup<"adjusted-attributes">; def Attributes : DiagGroup<"attributes", [UnknownAttributes, - IgnoredAttributes]>; + IgnoredAttributes, + AdjustedAttributes]>; def UnknownSanitizers : DiagGroup<"unknown-sanitizers">; def UnnamedTypeTemplateArgs : DiagGroup<"unnamed-type-template-args", [CXX98CompatUnnamedTypeTemplateArgs]>; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f82cdadd9ff47..86bb123137432 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10324,6 +10324,9 @@ def err_sycl_x_y_z_arguments_must_be_one : Error< "%0 X-, Y- and Z- sizes must be 1 when %1 attribute is used with value 0">; def err_intel_attribute_argument_is_not_in_range: Error< "The value of %0 attribute must be in range from 0 to 3">; +def warn_boolean_attribute_argument_is_not_valid: Warning< + "The value of %0 attribute should be 0 or 1. Adjusted to 1">, + InGroup; def err_sycl_attibute_cannot_be_applied_here : Error<"%0 attribute cannot be applied to a " "%select{static function or function in an anonymous namespace" diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 612594c28bc14..3e52589f7c636 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -668,6 +668,17 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, Fn->setMetadata("max_global_work_dim", llvm::MDNode::get(Context, AttrMDArgs)); } + + if (const SYCLIntelUsesGlobalWorkOffsetAttr *A = + FD->getAttr()) { + bool IsEnabled = A->getEnabled(); + if (!IsEnabled) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(IsEnabled))}; + Fn->setMetadata("uses_global_work_offset", + llvm::MDNode::get(Context, AttrMDArgs)); + } + } } /// Determine whether the function F ends with a return stmt. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 855469894821f..733ec51120a33 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5166,6 +5166,26 @@ static bool checkForDuplicateAttribute(Sema &S, Decl *D, return false; } +static void handleUsesGlobalWorkOffsetAttr(Sema &S, Decl *D, + const ParsedAttr &Attr) { + if (S.LangOpts.SYCLIsHost) + return; + + checkForDuplicateAttribute(S, D, Attr); + + uint32_t Enabled; + const Expr *E = Attr.getArgAsExpr(0); + if (!checkUInt32Argument(S, Attr, E, Enabled, 0, + /*StrictlyUnsigned=*/true)) + return; + if (Enabled > 1) + S.Diag(Attr.getLoc(), diag::warn_boolean_attribute_argument_is_not_valid) + << Attr; + + D->addAttr(::new (S.Context) + SYCLIntelUsesGlobalWorkOffsetAttr(S.Context, Attr, Enabled)); +} + /// Handle the [[intelfpga::doublepump]] and [[intelfpga::singlepump]] attributes. /// One but not both can be specified /// Both are incompatible with the __register__ attribute. @@ -7599,6 +7619,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SYCLIntelMaxGlobalWorkDim: handleMaxGlobalWorkDimAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLIntelUsesGlobalWorkOffset: + handleUsesGlobalWorkOffsetAttr(S, D, AL); + break; case ParsedAttr::AT_VecTypeHint: handleVecTypeHint(S, D, AL); break; @@ -8082,6 +8105,10 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D, } else if (const auto *A = D->getAttr()) { Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; D->setInvalidDecl(); + } else if (const auto *A = + D->getAttr()) { + Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; + D->setInvalidDecl(); } else if (const auto *A = D->getAttr()) { Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; D->setInvalidDecl(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ea78f26bde711..c571d954db8d1 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -469,6 +469,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor { FD->dropAttr(); } } + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } else { + SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A; + FD->dropAttr(); + } + } // TODO: vec_len_hint should be handled here @@ -1359,7 +1367,8 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelKernelArgsRestrict: case attr::Kind::SYCLIntelNumSimdWorkItems: case attr::Kind::SYCLIntelMaxGlobalWorkDim: - case attr::Kind::SYCLIntelMaxWorkGroupSize: { + case attr::Kind::SYCLIntelMaxWorkGroupSize: + case attr::Kind::SYCLIntelUsesGlobalWorkOffset: { SYCLKernel->addAttr(A); break; } diff --git a/clang/test/CodeGenSYCL/intel-fpga-uses-global-work-offset.cpp b/clang/test/CodeGenSYCL/intel-fpga-uses-global-work-offset.cpp new file mode 100644 index 0000000000000..2107f214df8be --- /dev/null +++ b/clang/test/CodeGenSYCL/intel-fpga-uses-global-work-offset.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -fsycl-is-device -emit-llvm -o - %s | FileCheck %s + +class Foo { +public: + [[intelfpga::uses_global_work_offset(0)]] void operator()() {} +}; + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +void bar() { + Foo boo; + kernel(boo); + + kernel( + []() [[intelfpga::uses_global_work_offset(0)]]{}); + + kernel( + []() [[intelfpga::uses_global_work_offset(1)]]{}); +} + +// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !uses_global_work_offset ![[NUM5:[0-9]+]] +// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !uses_global_work_offset ![[NUM5]] +// CHECK: define spir_kernel void @{{.*}}kernel_name3() {{.*}} ![[NUM4:[0-9]+]] +// CHECK-NOT: ![[NUM4]] = !{i32 1} +// CHECK: ![[NUM5]] = !{i32 0} diff --git a/clang/test/SemaSYCL/intel-fpga-uses-global-work-offset.cpp b/clang/test/SemaSYCL/intel-fpga-uses-global-work-offset.cpp new file mode 100644 index 0000000000000..f4063a8999dd2 --- /dev/null +++ b/clang/test/SemaSYCL/intel-fpga-uses-global-work-offset.cpp @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -Wno-return-type -fsycl-is-device -fcxx-exceptions -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s + +struct FuncObj { + [[intelfpga::uses_global_work_offset(1)]] void operator()() {} +}; + +template +void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + // CHECK: SYCLIntelUsesGlobalWorkOffsetAttr{{.*}}Enabled + kernel([]() { + FuncObj(); + }); + + // CHECK: SYCLIntelUsesGlobalWorkOffsetAttr + // CHECK-NOT: Enabled + kernel( + []() [[intelfpga::uses_global_work_offset(0)]]{}); + + // CHECK: SYCLIntelUsesGlobalWorkOffsetAttr{{.*}}Enabled + // expected-warning@+2{{'uses_global_work_offset' attribute should be 0 or 1. Adjusted to 1}} + kernel( + []() [[intelfpga::uses_global_work_offset(42)]]{}); + + // expected-error@+2{{'uses_global_work_offset' attribute requires a non-negative integral compile time constant expression}} + kernel( + []() [[intelfpga::uses_global_work_offset(-1)]]{}); + + // expected-error@+2{{'uses_global_work_offset' attribute requires parameter 0 to be an integer constant}} + kernel( + []() [[intelfpga::uses_global_work_offset("foo")]]{}); + + kernel([]() { + // expected-error@+1{{'uses_global_work_offset' attribute only applies to functions}} + [[intelfpga::uses_global_work_offset(1)]] int a; + }); + + // CHECK: SYCLIntelUsesGlobalWorkOffsetAttr{{.*}} + // CHECK-NOT: Enabled + // CHECK: SYCLIntelUsesGlobalWorkOffsetAttr{{.*}}Enabled + // expected-warning@+2{{attribute 'uses_global_work_offset' is already applied}} + kernel( + []() [[intelfpga::uses_global_work_offset(0), intelfpga::uses_global_work_offset(1)]]{}); + + return 0; +} From 659efdf8bd2694ac5c880e4bbc4c4e59ba53c50d Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Tue, 21 Jan 2020 20:10:20 +0300 Subject: [PATCH 2/4] [SYCL] Diagnose __float128 type usage in device code, fixes and cleanups (#971) The problem is that CheckSYCLCall used to get the caller incorrectly. getCurFunctionDecl gets the wrong thing. It will get the function containing the lambda at definition time, rather than the lambda operator(). Additional changes: Cleaned up unnecessary calls of CheckSYCLCall function. Fixed diagnosing of storage allocation through deferred diagnostics system. Renamed CheckSYCLCall with checkSYCLDeviceFunction since we don't really check calls as CUDA/OpenMP does. Removed unnecessary emitting of diagnostics from SemaSYCL. Signed-off-by: Mariya Podchishchaeva --- clang/include/clang/Sema/Sema.h | 26 +++++++++++++++++++-- clang/lib/Sema/Sema.cpp | 3 +++ clang/lib/Sema/SemaDeclCXX.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 14 +++-------- clang/lib/Sema/SemaExprCXX.cpp | 9 ++++--- clang/lib/Sema/SemaOverload.cpp | 6 ----- clang/lib/Sema/SemaSYCL.cpp | 21 ++++++----------- clang/lib/Sema/SemaStmtAsm.cpp | 9 ++++--- clang/lib/Sema/SemaType.cpp | 12 ++++++---- clang/test/SemaSYCL/inline-asm.cpp | 2 +- clang/test/SemaSYCL/restrict-recursion3.cpp | 6 +++-- clang/test/SemaSYCL/restrict-recursion4.cpp | 6 +++-- clang/test/SemaSYCL/sycl-restrict.cpp | 16 +++++++++---- 13 files changed, 75 insertions(+), 57 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 73194b1d6f471..db702ec62e794 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12079,11 +12079,33 @@ class Sema final { KernelCallDllimportFunction, KernelCallVariadicFunction }; - DeviceDiagBuilder SYCLDiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); bool isKnownGoodSYCLDecl(const Decl *D); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void MarkDevice(void); - bool CheckSYCLCall(SourceLocation Loc, FunctionDecl *Callee); + + /// Creates a DeviceDiagBuilder that emits the diagnostic if the current + /// context is "used as device code". + /// + /// - If CurLexicalContext is a kernel function or it is known that the + /// function will be emitted for the device, emits the diagnostics + /// immediately. + /// - If CurLexicalContext is a function and we are compiling + /// for the device, but we don't know that this function will be codegen'ed + /// for devive yet, creates a diagnostic which is emitted if and when we + /// realize that the function will be codegen'ed. + /// + /// Example usage: + /// + /// Variables with thread storage duration are not allowed to be used in SYCL + /// device code + /// if (getLangOpts().SYCLIsDevice) + /// SYCLDiagIfDeviceCode(Loc, diag::err_thread_unsupported); + DeviceDiagBuilder SYCLDiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); + + /// Checks if Callee function is a device function and emits + /// diagnostics if it is known that it is a device function, adds this + /// function to the DeviceCallGraph otherwise. + void checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); }; /// RAII object that enters a new expression evaluation context. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 8c9714d6e4420..d44fd674f2eb2 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1610,6 +1610,9 @@ Sema::DeviceDiagBuilder Sema::targetDiag(SourceLocation Loc, unsigned DiagID) { if (getLangOpts().CUDA) return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID) : CUDADiagIfHostCode(Loc, DiagID); + // TODO: analyze which usages of targetDiag could be reused for SYCL. + // if (getLangOpts().SYCLIsDevice) + // return SYCLDiagIfDeviceCode(Loc, DiagID); return DeviceDiagBuilder(DeviceDiagBuilder::K_Immediate, Loc, DiagID, getCurFunctionDecl(), *this); } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 34137657a919e..437de8b595b4b 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -14644,7 +14644,7 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor)) return ExprError(); if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(ConstructLoc, Constructor); + checkSYCLDeviceFunction(ConstructLoc, Constructor); return CXXConstructExpr::Create( Context, DeclInitType, ConstructLoc, Constructor, Elidable, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ac2a9e1cff08e..a0b4e6193c78d 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -269,7 +269,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, return true; if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(Loc, FD); + checkSYCLDeviceFunction(Loc, FD); } if (auto *MD = dyn_cast(D)) { @@ -15649,7 +15649,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, if (getLangOpts().CUDA) CheckCUDACall(Loc, Func); if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(Loc, Func); + checkSYCLDeviceFunction(Loc, Func); // If we need a definition, try to create one. if (NeedDefinition && !Func->getBody()) { @@ -17219,15 +17219,7 @@ namespace { } void VisitCXXNewExpr(CXXNewExpr *E) { - FunctionDecl *FD = E->getOperatorNew(); - if (FD && S.getLangOpts().SYCLIsDevice) { - if (FD->isReplaceableGlobalAllocationFunction()) - S.SYCLDiagIfDeviceCode(E->getExprLoc(), diag::err_sycl_restrict) - << S.KernelAllocateStorage; - else if (FunctionDecl *Def = FD->getDefinition()) - S.CheckSYCLCall(E->getExprLoc(), Def); - } - if (FD) + if (E->getOperatorNew()) S.MarkFunctionReferenced(E->getBeginLoc(), E->getOperatorNew()); if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getBeginLoc(), E->getOperatorDelete()); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index ed26411a2d22e..327bc100ce426 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -2171,16 +2171,15 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, if (DiagnoseUseOfDecl(OperatorNew, StartLoc)) return ExprError(); MarkFunctionReferenced(StartLoc, OperatorNew); - if (getLangOpts().SYCLIsDevice) { - CheckSYCLCall(StartLoc, OperatorNew); - } + if (getLangOpts().SYCLIsDevice && + OperatorNew->isReplaceableGlobalAllocationFunction()) + SYCLDiagIfDeviceCode(StartLoc, diag::err_sycl_restrict) + << KernelAllocateStorage; } if (OperatorDelete) { if (DiagnoseUseOfDecl(OperatorDelete, StartLoc)) return ExprError(); MarkFunctionReferenced(StartLoc, OperatorDelete); - if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(StartLoc, OperatorDelete); } return CXXNewExpr::Create(Context, UseGlobal, OperatorNew, OperatorDelete, diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 5fb59b545176d..fa811ee2bd25a 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -12916,8 +12916,6 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, FnDecl->getType()->castAs())) return ExprError(); - if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(OpLoc, FnDecl); return MaybeBindToTemporary(TheCall); } else { // We matched a built-in operator. Convert the arguments, then @@ -13270,8 +13268,6 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, isa(FnDecl), OpLoc, TheCall->getSourceRange(), VariadicDoesNotApply); - if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(OpLoc, FnDecl); ExprResult R = MaybeBindToTemporary(TheCall); if (R.isInvalid()) return ExprError(); @@ -13633,8 +13629,6 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, Method->getType()->castAs())) return ExprError(); - if (getLangOpts().SYCLIsDevice) - CheckSYCLCall(RLoc, FnDecl); return MaybeBindToTemporary(TheCall); } else { // We matched a built-in operator. Convert the arguments, then diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c571d954db8d1..28560a1667fdc 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -359,10 +359,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor { // new operator and any user-defined overloads that // do not allocate storage are permitted. if (FunctionDecl *FD = E->getOperatorNew()) { - if (FD->isReplaceableGlobalAllocationFunction()) { - SemaRef.Diag(E->getExprLoc(), diag::err_sycl_restrict) - << Sema::KernelAllocateStorage; - } else if (FunctionDecl *Def = FD->getDefinition()) { + if (FunctionDecl *Def = FD->getDefinition()) { if (!Def->hasAttr()) { Def->addAttr(SYCLDeviceAttr::CreateImplicit(SemaRef.Context)); SemaRef.addSyclDeviceDecl(Def); @@ -529,8 +526,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor { if (!CheckSYCLType(Field->getType(), Field->getSourceRange(), Visited)) { if (SemaRef.getLangOpts().SYCLIsDevice) - SemaRef.SYCLDiagIfDeviceCode(Loc.getBegin(), - diag::note_sycl_used_here); + SemaRef.Diag(Loc.getBegin(), diag::note_sycl_used_here); return false; } } @@ -539,8 +535,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor { if (!CheckSYCLType(Field->getType(), Field->getSourceRange(), Visited)) { if (SemaRef.getLangOpts().SYCLIsDevice) - SemaRef.SYCLDiagIfDeviceCode(Loc.getBegin(), - diag::note_sycl_used_here); + SemaRef.Diag(Loc.getBegin(), diag::note_sycl_used_here); return false; } } @@ -1399,8 +1394,7 @@ void Sema::MarkDevice(void) { // Do we know that we will eventually codegen the given function? static bool isKnownEmitted(Sema &S, FunctionDecl *FD) { - if (!FD) - return true; // Seen in LIT testing + assert(FD && "Given function may not be null."); if (FD->hasAttr() || FD->hasAttr()) return true; @@ -1416,16 +1410,16 @@ Sema::DeviceDiagBuilder Sema::SYCLDiagIfDeviceCode(SourceLocation Loc, "Should only be called during SYCL compilation"); FunctionDecl *FD = dyn_cast(getCurLexicalContext()); DeviceDiagBuilder::Kind DiagKind = [this, FD] { - if (ConstructingOpenCLKernel) + if (ConstructingOpenCLKernel || !FD) return DeviceDiagBuilder::K_Nop; - else if (isKnownEmitted(*this, FD)) + if (isKnownEmitted(*this, FD)) return DeviceDiagBuilder::K_ImmediateWithCallStack; return DeviceDiagBuilder::K_Deferred; }(); return DeviceDiagBuilder(DiagKind, Loc, DiagID, FD, *this); } -bool Sema::CheckSYCLCall(SourceLocation Loc, FunctionDecl *Callee) { +void Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) { assert(Callee && "Callee may not be null."); FunctionDecl *Caller = dyn_cast(getCurLexicalContext()); @@ -1435,7 +1429,6 @@ bool Sema::CheckSYCLCall(SourceLocation Loc, FunctionDecl *Callee) { markKnownEmitted(*this, Caller, Callee, Loc, isKnownEmitted); else if (Caller) DeviceCallGraph[Caller].insert({Callee, Loc}); - return true; } // ----------------------------------------------------------------------------- diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp index 2c8d1c386f14c..10f80d6c4fccd 100644 --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -259,12 +259,11 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple, // Skip all the checks if we are compiling SYCL device code, but the function // is not marked to be used on device, this code won't be codegen'ed anyway. if (getLangOpts().SYCLIsDevice) { - SYCLDiagIfDeviceCode(AsmLoc, diag::err_sycl_restrict) - << KernelUseAssembly; + SYCLDiagIfDeviceCode(AsmLoc, diag::err_sycl_restrict) << KernelUseAssembly; return new (Context) - GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, - NumInputs, Names, Constraints, Exprs.data(), AsmString, - NumClobbers, Clobbers, NumLabels, RParenLoc); + GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs, + Names, Constraints, Exprs.data(), AsmString, NumClobbers, + Clobbers, NumLabels, RParenLoc); } FunctionDecl *FD = dyn_cast(getCurLexicalContext()); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 732b3e06e8ccd..611c649338257 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1500,11 +1500,15 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = Context.DoubleTy; break; case DeclSpec::TST_float128: - if (!S.Context.getTargetInfo().hasFloat128Type() && - !S.getLangOpts().SYCLIsDevice && - !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)) + if (!S.Context.getTargetInfo().hasFloat128Type() && + S.getLangOpts().SYCLIsDevice) + S.SYCLDiagIfDeviceCode(DS.getTypeSpecTypeLoc(), + diag::err_type_unsupported) + << "__float128"; + else if (!S.Context.getTargetInfo().hasFloat128Type() && + !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) - << "__float128"; + << "__float128"; Result = Context.Float128Ty; break; case DeclSpec::TST_bool: Result = Context.BoolTy; break; // _Bool or bool diff --git a/clang/test/SemaSYCL/inline-asm.cpp b/clang/test/SemaSYCL/inline-asm.cpp index 4d414e29a9aa7..fc5f0986e7adc 100644 --- a/clang/test/SemaSYCL/inline-asm.cpp +++ b/clang/test/SemaSYCL/inline-asm.cpp @@ -19,7 +19,7 @@ void bar() { #endif // LINUX_ASM } -template +template __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task // expected-note@+1 2{{function implemented using recursion declared here}} __attribute__((sycl_kernel)) void kernel_single_task2(Func kernelFunc) { + // expected-note@+1 {{called by 'kernel_single_task2}} kernelFunc(); - // expected-error@+1 2{{SYCL kernel cannot allocate storage}} - int *ip = new int; // expected-error@+1 2{{SYCL kernel cannot call a recursive function}} kernel_single_task2(kernelFunc); } int main() { + // expected-note@+1 {{called by 'operator()'}} kernel_single_task2([]() { usage3( &addInt ); }); return fib(5); } diff --git a/clang/test/SemaSYCL/restrict-recursion4.cpp b/clang/test/SemaSYCL/restrict-recursion4.cpp index 7264f2ccf803d..cad0b9aff7273 100644 --- a/clang/test/SemaSYCL/restrict-recursion4.cpp +++ b/clang/test/SemaSYCL/restrict-recursion4.cpp @@ -18,6 +18,8 @@ void kernel2(void) { using myFuncDef = int(int,int); void usage2(myFuncDef functionPtr) { + // expected-error@+1 {{SYCL kernel cannot allocate storage}} + int *ip = new int; // expected-error@+1 {{SYCL kernel cannot call a recursive function}} kernel2(); } @@ -28,12 +30,12 @@ int addInt(int n, int m) { template __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { - // expected-error@+1 {{SYCL kernel cannot allocate storage}} - int *ip = new int; + // expected-note@+1 {{called by 'kernel_single_task}} kernelFunc(); } int main() { + // expected-note@+1 {{called by 'operator()'}} kernel_single_task([]() {usage2(&addInt);}); return fib(5); } diff --git a/clang/test/SemaSYCL/sycl-restrict.cpp b/clang/test/SemaSYCL/sycl-restrict.cpp index 083b7f775f2cc..658395e92c3ff 100644 --- a/clang/test/SemaSYCL/sycl-restrict.cpp +++ b/clang/test/SemaSYCL/sycl-restrict.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s -// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -fno-sycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s -// RUN: %clang_cc1 -fcxx-exceptions -fsycl-is-device -DALLOW_FP=1 -fsycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fcxx-exceptions -triple spir64 -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fcxx-exceptions -triple spir64 -fsycl-is-device -fno-sycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fcxx-exceptions -triple spir64 -fsycl-is-device -DALLOW_FP=1 -fsycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s namespace std { @@ -65,6 +65,7 @@ bool isa_B(A *a) { // expected-error@+1 {{SYCL kernel cannot allocate storage}} int *ip = new int; int i; int *p3 = new(&i) int; // no error on placement new + // expected-note@+1 {{called by 'isa_B'}} OverloadedNewDelete *x = new( struct OverloadedNewDelete ); auto y = new struct OverloadedNewDelete [5]; // expected-error@+1 {{SYCL kernel cannot use rtti}} @@ -102,6 +103,7 @@ using myFuncDef = int(int,int); void eh_ok(void) { + __float128 A; try { ; } catch (...) { @@ -138,6 +140,9 @@ void usage(myFuncDef functionPtr) { Check_RTTI_Restriction::kernel1([]() { Check_RTTI_Restriction::A *a; Check_RTTI_Restriction::isa_B(a); }); + + // expected-error@+1 {{__float128 is not supported on this target}} + __float128 A; } namespace ns { @@ -172,9 +177,12 @@ int use2 ( a_type ab, a_type *abp ) { // expected-note@+1 {{called by 'use2'}} eh_not_ok(); Check_RTTI_Restriction:: A *a; + // expected-note@+1 2{{called by 'use2'}} Check_RTTI_Restriction:: isa_B(a); + // expected-note@+1 {{called by 'use2'}} usage(&addInt); Check_User_Operators::Fraction f1(3, 8), f2(1, 2), f3(10, 2); + // expected-note@+1 {{called by 'use2'}} if (f1 == f2) return false; } @@ -183,7 +191,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); a_type ab; a_type *p; - // expected-note@+1 {{called by 'kernel_single_task'}} + // expected-note@+1 5{{called by 'kernel_single_task'}} use2(ab, p); } From 54dddb4b0e7740836c90048a28311fd579227814 Mon Sep 17 00:00:00 2001 From: JoeyGIntel <56038955+JoeyGIntel@users.noreply.github.com> Date: Wed, 22 Jan 2020 02:53:54 -0500 Subject: [PATCH 3/4] [SYCL] Fix final result saturation in mad_sat host implementation (#1025) Signed-off-by: Joey Genfi --- sycl/source/detail/builtins_integer.cpp | 5 +++-- sycl/test/built-ins/scalar_integer.cpp | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/builtins_integer.cpp b/sycl/source/detail/builtins_integer.cpp index 73ef8e73bce98..7bc5cbcf0bbc6 100644 --- a/sycl/source/detail/builtins_integer.cpp +++ b/sycl/source/detail/builtins_integer.cpp @@ -153,10 +153,11 @@ template inline T __s_long_mad_hi(T a, T b, T c) { template inline T __s_mad_sat(T a, T b, T c) { using UPT = typename d::make_larger::type; UPT mul = UPT(a) * UPT(b); + UPT res = mul + UPT(c); const UPT max = d::max_v(); const UPT min = d::min_v(); - mul = std::min(std::max(mul, min), max); - return __s_add_sat(T(mul), c); + res = std::min(std::max(res, min), max); + return T(res); } template inline T __s_long_mad_sat(T a, T b, T c) { diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 61d8e88542694..528f4fb18aa07 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -287,6 +287,28 @@ int main() { assert(r == 0x7FFFFFFF); } + // mad_sat test two + { + char r(0); + char exp(120); + { + cl::sycl::buffer buf(&r, cl::sycl::range<1>(1)); + cl::sycl::queue q; + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { + signed char inputData_0(-17); + signed char inputData_1(-10); + signed char inputData_2(-50); + acc[0] = cl::sycl::mad_sat(inputData_0, inputData_1, inputData_2); + }); + }); + } + assert(r == exp); // Should return the real number of i0*i1+i2 in CPU + // Only fails in vector, but passes in scalar. + + } + // mul_hi { s::cl_int r{ 0 }; From 7a2c8df32437f303ce79a7c06b0dc5679fd0a265 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 22 Jan 2020 15:34:49 +0300 Subject: [PATCH 4/4] [SYCL] Re-use OpenCL address space attributes for SYCL Today we re-use OpenCL parsed attributes, but have separate SYCL address space semantic attributes as current implementation of OpenCL semantics breaks valid C++. This patch enables re-use of OpenCL semantic attributes by allowing conversions between types qualified with OpenCL address spaces and type w/o address space qualifiers. Clang compiler (almost) always adds address space qualifiers in OpenCL mode, so it should not affect OpenCL mode. NOTE: this change also disables implicit conversion between the unqualified types and types qualified with `__attribute__((address_space(N)))`, enabled by one of the previous SYCL patches. Signed-off-by: Alexey Bader --- clang/include/clang/AST/Type.h | 15 ++-- clang/include/clang/Basic/AddressSpaces.h | 8 -- .../clang/Basic/DiagnosticSemaKinds.td | 2 - clang/include/clang/Sema/ParsedAttr.h | 18 ----- clang/lib/AST/ASTContext.cpp | 5 -- clang/lib/AST/TypePrinter.cpp | 4 - clang/lib/Basic/Targets/AMDGPU.cpp | 10 --- clang/lib/Basic/Targets/NVPTX.h | 6 -- clang/lib/Basic/Targets/SPIR.h | 18 +---- clang/lib/Basic/Targets/TCE.h | 6 -- clang/lib/Basic/Targets/X86.h | 5 -- clang/lib/Sema/SemaType.cpp | 28 +------ .../address-space-parameter-conversions.cpp | 39 +--------- .../SemaOpenCLCXX/address-space-lambda.cl | 4 +- .../address-space-parameter-conversions.cpp | 6 +- .../SemaTemplate/address_space-dependent.cpp | 2 +- sycl/include/CL/__spirv/spirv_vars.hpp | 74 ++++++++----------- 17 files changed, 50 insertions(+), 200 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 95ecc5f0a38da..b4fbfc080034d 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -486,15 +486,12 @@ class Qualifiers { /// Returns true if the address space in these qualifiers is equal to or /// a superset of the address space in the argument qualifiers. bool isAddressSpaceSupersetOf(Qualifiers other) const { - - return - isAddressSpaceSupersetOf(getAddressSpace(), other.getAddressSpace()) || - (!hasAddressSpace() && - (other.getAddressSpace() == LangAS::sycl_private || - other.getAddressSpace() == LangAS::sycl_local || - other.getAddressSpace() == LangAS::sycl_global || - other.getAddressSpace() == LangAS::sycl_constant || - other.getAddressSpace() == LangAS::sycl_generic)); + return isAddressSpaceSupersetOf(getAddressSpace(), + other.getAddressSpace()) || + (!hasAddressSpace() && + (other.getAddressSpace() == LangAS::opencl_private || + other.getAddressSpace() == LangAS::opencl_local || + other.getAddressSpace() == LangAS::opencl_global)); } /// Determines if these qualifiers compatibly include another set. diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h index 996b6f03aadf5..faf7f303aa2d6 100644 --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -42,14 +42,6 @@ enum class LangAS : unsigned { cuda_constant, cuda_shared, - sycl_global, - sycl_local, - sycl_constant, - sycl_private, - // Likely never used, but useful in the future to reserve the spot in the - // enum. - sycl_generic, - // Pointer size and extension address spaces. ptr32_sptr, ptr32_uptr, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 86bb123137432..c1a12e9d4fab0 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10292,8 +10292,6 @@ def err_builtin_launder_invalid_arg : Error< "'__builtin_launder' is not allowed">; // SYCL-specific diagnostics -def err_sycl_attribute_address_space_invalid : Error< - "address space is outside the valid range of values">; def err_sycl_kernel_name_class_not_top_level : Error< "kernel name class and its template argument classes' declarations can only " "nest in a namespace: %0">; diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h index fb06e7857994f..d9d8585970d99 100644 --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -534,24 +534,6 @@ class ParsedAttr final } } - /// If this is an OpenCL addr space attribute returns its SYCL representation - /// in LangAS, otherwise returns default addr space. - LangAS asSYCLLangAS() const { - switch (getKind()) { - case ParsedAttr::AT_OpenCLConstantAddressSpace: - return LangAS::sycl_constant; - case ParsedAttr::AT_OpenCLGlobalAddressSpace: - return LangAS::sycl_global; - case ParsedAttr::AT_OpenCLLocalAddressSpace: - return LangAS::sycl_local; - case ParsedAttr::AT_OpenCLPrivateAddressSpace: - return LangAS::sycl_private; - case ParsedAttr::AT_OpenCLGenericAddressSpace: - default: - return LangAS::Default; - } - } - AttributeCommonInfo::Kind getKind() const { return getParsedKind(); } }; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 869deeebb1ec4..645dcc162165e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -836,11 +836,6 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T, 5, // cuda_device 6, // cuda_constant 7, // cuda_shared - 1, // sycl_global - 3, // sycl_local - 2, // sycl_constant - 0, // sycl_private - 4, // sycl_generic 8, // ptr32_sptr 9, // ptr32_uptr 10 // ptr64 diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 808afb09aebea..bf9318cf60c09 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1792,16 +1792,12 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { case LangAS::Default: return ""; case LangAS::opencl_global: - case LangAS::sycl_global: return "__global"; case LangAS::opencl_local: - case LangAS::sycl_local: return "__local"; case LangAS::opencl_private: - case LangAS::sycl_private: return "__private"; case LangAS::opencl_constant: - case LangAS::sycl_constant: return "__constant"; case LangAS::opencl_generic: return "__generic"; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 34a1a2375d8d8..6f4ad41739f8c 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -48,11 +48,6 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared - Global, // sycl_global - Local, // sycl_local - Constant, // sycl_constant - Private, // sycl_private - Generic, // sycl_generic Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 @@ -68,11 +63,6 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared - Global, // sycl_global - Local, // sycl_local - Constant, // sycl_constant - Private, // sycl_private - Generic, // sycl_generic Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index aa97741353da9..63780789c474e 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -33,12 +33,6 @@ static const unsigned NVPTXAddrSpaceMap[] = { 1, // cuda_device 4, // cuda_constant 3, // cuda_shared - 1, // sycl_global - 3, // sycl_local - 4, // sycl_constant - 0, // sycl_private - // FIXME: generic has to be added to the target - 0, // sycl_generic 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index b24d0107d51a0..b250a72ad6c76 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -33,11 +33,6 @@ static const unsigned SPIRAddrSpaceMap[] = { 0, // cuda_device 0, // cuda_constant 0, // cuda_shared - 1, // sycl_global - 3, // sycl_local - 2, // sycl_constant - 0, // sycl_private - 4, // sycl_generic 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 @@ -53,11 +48,6 @@ static const unsigned SYCLAddrSpaceMap[] = { 0, // cuda_device 0, // cuda_constant 0, // cuda_shared - 1, // sycl_global - 3, // sycl_local - 2, // sycl_constant - 0, // sycl_private - 4, // sycl_generic 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 @@ -70,11 +60,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { TLSSupported = false; VLASupported = false; LongWidth = LongAlign = 64; - if (Triple.getEnvironment() == llvm::Triple::SYCLDevice) { - AddrSpaceMap = &SYCLAddrSpaceMap; - } else { - AddrSpaceMap = &SPIRAddrSpaceMap; - } + AddrSpaceMap = (Triple.getEnvironment() == llvm::Triple::SYCLDevice) + ? &SYCLAddrSpaceMap + : &SPIRAddrSpaceMap; UseAddrSpaceMapMangling = true; HasLegalHalfType = true; HasFloat16 = true; diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h index f7e2bb99e9371..9cbf2a3688a2e 100644 --- a/clang/lib/Basic/Targets/TCE.h +++ b/clang/lib/Basic/Targets/TCE.h @@ -40,12 +40,6 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = { 0, // cuda_device 0, // cuda_constant 0, // cuda_shared - 3, // sycl_global - 4, // sycl_local - 5, // sycl_constant - 0, // sycl_private - // FIXME: generic has to be added to the target - 0, // sycl_generic 0, // ptr32_sptr 0, // ptr32_uptr 0, // ptr64 diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index e05de294fb0f6..5b5e284e51419 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -32,11 +32,6 @@ static const unsigned X86AddrSpaceMap[] = { 0, // cuda_device 0, // cuda_constant 0, // cuda_shared - 0, // sycl_global - 0, // sycl_local - 0, // sycl_constant - 0, // sycl_private - 0, // sycl_generic 270, // ptr32_sptr 271, // ptr32_uptr 272 // ptr64 diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 611c649338257..97ee00c93c70c 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -5963,35 +5963,14 @@ static bool BuildAddressSpaceIndex(Sema &S, LangAS &ASIdx, llvm::APSInt max(addrSpace.getBitWidth()); max = Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace; - if (addrSpace > max) { S.Diag(AttrLoc, diag::err_attribute_address_space_too_high) << (unsigned)max.getZExtValue() << AddrSpace->getSourceRange(); return false; } - if (S.LangOpts.SYCLIsDevice && (addrSpace >= 4)) { - S.Diag(AttrLoc, diag::err_sycl_attribute_address_space_invalid) - << AddrSpace->getSourceRange(); - return false; - } - - ASIdx = getLangASFromTargetAS( - static_cast(addrSpace.getZExtValue())); - - if (S.LangOpts.SYCLIsDevice) { - ASIdx = - [](unsigned AS) { - switch (AS) { - case 0: return LangAS::sycl_private; - case 1: return LangAS::sycl_global; - case 2: return LangAS::sycl_constant; - case 3: return LangAS::sycl_local; - case 4: default: llvm_unreachable("Invalid SYCL AS"); - } - }(static_cast(ASIdx) - - static_cast(LangAS::FirstTargetAddressSpace)); - } + ASIdx = + getLangASFromTargetAS(static_cast(addrSpace.getZExtValue())); return true; } @@ -6117,8 +6096,7 @@ static void HandleAddressSpaceTypeAttribute(QualType &Type, Attr.setInvalid(); } else { // The keyword-based type attributes imply which address space to use. - ASIdx = S.getLangOpts().SYCLIsDevice ? - Attr.asSYCLLangAS() : Attr.asOpenCLLangAS(); + ASIdx = Attr.asOpenCLLangAS(); if (ASIdx == LangAS::Default) llvm_unreachable("Invalid address space"); diff --git a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp index 4057603281239..7ef8f3176dd04 100644 --- a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp +++ b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp @@ -9,7 +9,7 @@ void foo(int * Data) {} // CHECK-DAG: define spir_func void @[[RAW_PTR:[a-zA-Z0-9_]+]](i32 addrspace(4)* % void foo2(int * Data) {} // CHECK-DAG: define spir_func void @[[RAW_PTR2:[a-zA-Z0-9_]+]](i32 addrspace(4)* % -void foo(__attribute__((address_space(3))) int * Data) {} +void foo(__attribute__((opencl_local)) int * Data) {} // CHECK-DAG: define spir_func void [[LOC_PTR:@[a-zA-Z0-9_]+]](i32 addrspace(3)* % template @@ -18,12 +18,11 @@ void tmpl(T t){} void usages() { // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* - __attribute__((address_space(1))) int *GLOB; + __attribute__((opencl_global)) int *GLOB; // CHECK-DAG: [[LOC:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* __attribute__((opencl_local)) int *LOC; // CHECK-DAG: [[NoAS:%[a-zA-Z0-9]+]] = alloca i32 addrspace(4)* int *NoAS; - // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* __attribute__((opencl_private)) int *PRIV; @@ -94,57 +93,23 @@ void usages() { // CHECK-DAG: define linkonce_odr spir_func void [[GEN_TMPL]](i32 addrspace(4)* % void usages2() { - __attribute__((address_space(0))) int *PRIV_NUM; - // CHECK-DAG: [[PRIV_NUM:%[a-zA-Z0-9_]+]] = alloca i32* - __attribute__((address_space(0))) int *PRIV_NUM2; - // CHECK-DAG: [[PRIV_NUM2:%[a-zA-Z0-9_]+]] = alloca i32* __attribute__((opencl_private)) int *PRIV; // CHECK-DAG: [[PRIV:%[a-zA-Z0-9_]+]] = alloca i32* - __attribute__((address_space(1))) int *GLOB_NUM; - // CHECK-DAG: [[GLOB_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(1)* __attribute__((opencl_global)) int *GLOB; // CHECK-DAG: [[GLOB:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(1)* - __attribute__((address_space(2))) int *CONST_NUM; - // CHECK-DAG: [[CONST_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(2)* __attribute__((opencl_constant)) int *CONST; // CHECK-DAG: [[CONST:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(2)* - __attribute__((address_space(3))) int *LOCAL_NUM; - // CHECK-DAG: [[LOCAL_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(3)* __attribute__((opencl_local)) int *LOCAL; // CHECK-DAG: [[LOCAL:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(3)* - bar(*PRIV_NUM); - // CHECK-DAG: [[PRIV_NUM_LOAD:%[a-zA-Z0-9]+]] = load i32*, i32** [[PRIV_NUM]] - // CHECK-DAG: [[PRIV_NUM_ASCAST:%[a-zA-Z0-9]+]] = addrspacecast i32* [[PRIV_NUM_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[PRIV_NUM_ASCAST]]) - bar(*PRIV_NUM2); - // CHECK-DAG: [[PRIV_NUM2_LOAD:%[a-zA-Z0-9]+]] = load i32*, i32** [[PRIV_NUM2]] - // CHECK-DAG: [[PRIV_NUM2_ASCAST:%[a-zA-Z0-9]+]] = addrspacecast i32* [[PRIV_NUM2_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[PRIV_NUM2_ASCAST]]) bar(*PRIV); // CHECK-DAG: [[PRIV_LOAD:%[a-zA-Z0-9]+]] = load i32*, i32** [[PRIV]] // CHECK-DAG: [[PRIV_ASCAST:%[a-zA-Z0-9]+]] = addrspacecast i32* [[PRIV_LOAD]] to i32 addrspace(4)* // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[PRIV_ASCAST]]) - bar(*GLOB_NUM); - // CHECK-DAG: [[GLOB_NUM_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[GLOB_NUM]] - // CHECK-DAG: [[GLOB_NUM_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_NUM_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[GLOB_NUM_CAST]]) bar(*GLOB); // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[GLOB]] // CHECK-DAG: [[GLOB_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD]] to i32 addrspace(4)* // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[GLOB_CAST]]) - bar(*CONST_NUM); - // CHECK-DAG: [[CONST_NUM_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(2)*, i32 addrspace(2)** [[CONST_NUM]] - // CHECK-DAG: [[CONST_NUM_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(2)* [[CONST_NUM_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[CONST_NUM_CAST]]) - bar(*CONST); - // CHECK-DAG: [[CONST_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(2)*, i32 addrspace(2)** [[CONST]] - // CHECK-DAG: [[CONST_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(2)* [[CONST_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* dereferenceable(4) [[CONST_CAST]]) - bar2(*LOCAL_NUM); - // CHECK-DAG: [[LOCAL_NUM_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)** [[LOCAL_NUM]] - // CHECK-DAG: [[LOCAL_NUM_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOCAL_NUM_LOAD]] to i32 addrspace(4)* - // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* dereferenceable(4) [[LOCAL_NUM_CAST]]) bar2(*LOCAL); // CHECK-DAG: [[LOCAL_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)** [[LOCAL]] // CHECK-DAG: [[LOCAL_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOCAL_LOAD]] to i32 addrspace(4)* diff --git a/clang/test/SemaOpenCLCXX/address-space-lambda.cl b/clang/test/SemaOpenCLCXX/address-space-lambda.cl index eeea71e6353f6..f94717965016e 100644 --- a/clang/test/SemaOpenCLCXX/address-space-lambda.cl +++ b/clang/test/SemaOpenCLCXX/address-space-lambda.cl @@ -31,8 +31,8 @@ __kernel void test_qual() { //CHECK: |-CXXMethodDecl {{.*}} constexpr operator() 'void () const __generic' auto priv2 = []() __generic {}; priv2(); - auto priv3 = []() __global {}; //expected-note{{candidate function not viable: 'this' object is in address space '__private', but method expects object in address space '__global'}} //expected-note{{conversion candidate of type 'void (*)()'}} - priv3(); //expected-error{{no matching function for call to object of type}} + auto priv3 = []() __global {}; //ex pected-note{{candidate function not viable: 'this' object is in address space '__private', but method expects object in address space '__global'}} //ex pected-note{{conversion candidate of type 'void (*)()'}} + priv3(); //ex pected-error{{no matching function for call to object of type}} __constant auto const1 = []() __private{}; //expected-note{{candidate function not viable: 'this' object is in address space '__constant', but method expects object in address space '__private'}} //expected-note{{conversion candidate of type 'void (*)()'}} const1(); //expected-error{{no matching function for call to object of type '__constant (lambda at}} diff --git a/clang/test/SemaSYCL/address-space-parameter-conversions.cpp b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp index 4633964ac2235..bd7c6f3c22285 100644 --- a/clang/test/SemaSYCL/address-space-parameter-conversions.cpp +++ b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp @@ -13,7 +13,7 @@ void tmpl(T *t){} void usages() { __attribute__((opencl_global)) int *GLOB; __attribute__((opencl_private)) int *PRIV; - __attribute__((address_space(3))) int *LOC; + __attribute__((opencl_local)) int *LOC; int *NoAS; bar(*GLOB); @@ -53,10 +53,6 @@ void usages() { // expected-error@+1{{address space is negative}} __attribute__((address_space(-1))) int *TooLow; - // expected-error@+1{{address space is outside the valid range of values}} - __attribute__((address_space(6))) int *TooHigh; - // expected-error@+1{{address space is outside the valid range of values}} - __attribute__((address_space(4))) int *TriedGeneric; // expected-error@+1{{unknown type name '__generic'}} __generic int *IsGeneric; diff --git a/clang/test/SemaTemplate/address_space-dependent.cpp b/clang/test/SemaTemplate/address_space-dependent.cpp index 6983a39e5e360..76cd338769eaa 100644 --- a/clang/test/SemaTemplate/address_space-dependent.cpp +++ b/clang/test/SemaTemplate/address_space-dependent.cpp @@ -43,7 +43,7 @@ void neg() { template void tooBig() { - __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388590)}} + __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388595)}} } template diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index dd0eaa0dce42e..ff5ede3166c92 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -10,43 +10,35 @@ #ifdef __SYCL_DEVICE_ONLY__ -typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInGlobalSize; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInGlobalInvocationId; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInWorkgroupSize; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInNumWorkgroups; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInLocalInvocationId; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInWorkgroupId; -extern "C" const __attribute__((opencl_constant)) size_t_vec __spirv_BuiltInGlobalOffset; - -#define DEFINE_INT_ID_TO_XYZ_CONVERTER(POSTFIX) \ - template static size_t get##POSTFIX(); \ - template <> size_t get##POSTFIX<0>() { return __spirv_BuiltIn##POSTFIX.x; } \ - template <> size_t get##POSTFIX<1>() { return __spirv_BuiltIn##POSTFIX.y; } \ - template <> size_t get##POSTFIX<2>() { return __spirv_BuiltIn##POSTFIX.z; } - -namespace __spirv { +#define __SPIRV_VAR_QUALIFIERS extern "C" const __attribute__((opencl_global)) -DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalSize); -DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalInvocationId) -DEFINE_INT_ID_TO_XYZ_CONVERTER(WorkgroupSize) -DEFINE_INT_ID_TO_XYZ_CONVERTER(NumWorkgroups) -DEFINE_INT_ID_TO_XYZ_CONVERTER(LocalInvocationId) -DEFINE_INT_ID_TO_XYZ_CONVERTER(WorkgroupId) -DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalOffset) +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupSize; +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupMaxSize; +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInNumSubgroups; +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInNumEnqueuedSubgroups; +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupId; +__SPIRV_VAR_QUALIFIERS uint32_t __spirv_BuiltInSubgroupLocalInvocationId; -} // namespace __spirv +typedef size_t size_t_vec __attribute__((ext_vector_type(3))); +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalSize; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalOffset; -#undef DEFINE_INT_ID_TO_XYZ_CONVERTER +#undef __SPIRV_VAR_QUALIFIERS -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInSubgroupSize; -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInSubgroupMaxSize; -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInNumSubgroups; -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInNumEnqueuedSubgroups; -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInSubgroupId; -extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInSubgroupLocalInvocationId; +namespace __spirv { -#define DEFINE_INIT_SIZES(POSTFIX) \ +// Helper function templates to initialize and get vector component from SPIR-V +// built-in variables +#define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \ + template static size_t get##POSTFIX(); \ + template <> size_t get##POSTFIX<0>() { return __spirv_BuiltIn##POSTFIX.x; } \ + template <> size_t get##POSTFIX<1>() { return __spirv_BuiltIn##POSTFIX.y; } \ + template <> size_t get##POSTFIX<2>() { return __spirv_BuiltIn##POSTFIX.z; } \ \ template struct InitSizesST##POSTFIX; \ \ @@ -68,18 +60,16 @@ extern "C" const __attribute__((opencl_constant)) uint32_t __spirv_BuiltInSubgro return InitSizesST##POSTFIX::initSize(); \ } -namespace __spirv { +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalSize); +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalInvocationId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(WorkgroupSize) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(NumWorkgroups) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(LocalInvocationId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(WorkgroupId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(GlobalOffset) -DEFINE_INIT_SIZES(GlobalSize); -DEFINE_INIT_SIZES(GlobalInvocationId) -DEFINE_INIT_SIZES(WorkgroupSize) -DEFINE_INIT_SIZES(NumWorkgroups) -DEFINE_INIT_SIZES(LocalInvocationId) -DEFINE_INIT_SIZES(WorkgroupId) -DEFINE_INIT_SIZES(GlobalOffset) +#undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS } // namespace __spirv -#undef DEFINE_INIT_SIZES - #endif // __SYCL_DEVICE_ONLY__