diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp | 177 |
1 files changed, 134 insertions, 43 deletions
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp index 75364c10c154..6a66ecf6f94c 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp @@ -22,8 +22,8 @@ #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include <optional> using namespace clang; template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { @@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { } template <typename A> -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { +static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { return isa<A>(Attribute) && !(IgnoreImplicitAttr && Attribute->isImplicit()); }); } +Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, + CUDATargetContextKind K, + Decl *D) + : S(S_) { + SavedCtx = S.CurCUDATargetCtx; + assert(K == CTCK_InitGlobalVar); + auto *VD = dyn_cast_or_null<VarDecl>(D); + if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { + auto Target = CFT_Host; + if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && + !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || + hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || + hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) + Target = CFT_Device; + S.CurCUDATargetCtx = {Target, K, VD}; + } +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { - // Code that lives outside a function is run on the host. + // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) - return CFT_Host; + return CurCUDATargetCtx.Target; if (D->hasAttr<CUDAInvalidTargetAttr>()) return CFT_InvalidTarget; @@ -145,9 +163,11 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { if (Var->hasAttr<HIPManagedAttr>()) return CVT_Unified; - if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var)) - return CVT_Both; - if (Var->getType().isConstQualified() && Var->hasAttr<CUDAConstantAttr>() && + // Only constexpr and const variabless with implicit constant attribute + // are emitted on both sides. Such variables are promoted to device side + // only if they have static constant intializers on device side. + if ((Var->isConstexpr() || Var->getType().isConstQualified()) && + Var->hasAttr<CUDAConstantAttr>() && !hasExplicitAttr<CUDAConstantAttr>(Var)) return CVT_Both; if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || @@ -205,6 +225,15 @@ Sema::CUDAFunctionPreference Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, const FunctionDecl *Callee) { assert(Callee && "Callee must be valid."); + + // Treat ctor/dtor as host device function in device var initializer to allow + // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor + // will be diagnosed by checkAllowedCUDAInitializer. + if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && + CurCUDATargetCtx.Target == CFT_Device && + (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee))) + return CFP_HostDevice; + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); @@ -229,6 +258,15 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) return CFP_Native; + // HipStdPar mode is special, in that assessing whether a device side call to + // a host target is deferred to a subsequent pass, and cannot unambiguously be + // adjudicated in the AST, hence we optimistically allow them to pass here. + if (getLangOpts().HIPStdPar && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + CallerTarget == CFT_HostDevice) && + CalleeTarget == CFT_Host) + return CFP_HostDevice; + // (d) HostDevice behavior depends on compilation mode. if (CallerTarget == CFT_HostDevice) { // It's OK to call a compilation-mode matching function from an HD one. @@ -336,7 +374,7 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, if (!InClass || HasExplicitAttr) return false; - llvm::Optional<CUDAFunctionTarget> InferredTarget; + std::optional<CUDAFunctionTarget> InferredTarget; // We're going to invoke special member lookup; mark that these special // members are called from this one, and not from its caller. @@ -353,9 +391,7 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, } if (!ClassDecl->isAbstract()) { - for (const auto &VB : ClassDecl->vbases()) { - Bases.push_back(&VB); - } + llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); } for (const auto *B : Bases) { @@ -377,17 +413,16 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, continue; CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); - if (!InferredTarget.hasValue()) { + if (!InferredTarget) { InferredTarget = BaseMethodTarget; } else { bool ResolutionError = resolveCalleeCUDATargetConflict( - InferredTarget.getValue(), BaseMethodTarget, - InferredTarget.getPointer()); + *InferredTarget, BaseMethodTarget, &*InferredTarget); if (ResolutionError) { if (Diagnose) { Diag(ClassDecl->getLocation(), diag::note_implicit_member_target_infer_collision) - << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; + << (unsigned)CSM << *InferredTarget << BaseMethodTarget; } MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); return true; @@ -421,18 +456,16 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CUDAFunctionTarget FieldMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); - if (!InferredTarget.hasValue()) { + if (!InferredTarget) { InferredTarget = FieldMethodTarget; } else { bool ResolutionError = resolveCalleeCUDATargetConflict( - InferredTarget.getValue(), FieldMethodTarget, - InferredTarget.getPointer()); + *InferredTarget, FieldMethodTarget, &*InferredTarget); if (ResolutionError) { if (Diagnose) { Diag(ClassDecl->getLocation(), diag::note_implicit_member_target_infer_collision) - << (unsigned)CSM << InferredTarget.getValue() - << FieldMethodTarget; + << (unsigned)CSM << *InferredTarget << FieldMethodTarget; } MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); return true; @@ -444,10 +477,10 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, // If no target was inferred, mark this member as __host__ __device__; // it's the least restrictive option that can be invoked from any target. bool NeedsH = true, NeedsD = true; - if (InferredTarget.hasValue()) { - if (InferredTarget.getValue() == CFT_Device) + if (InferredTarget) { + if (*InferredTarget == CFT_Device) NeedsH = false; - else if (InferredTarget.getValue() == CFT_Host) + else if (*InferredTarget == CFT_Host) NeedsD = false; } @@ -590,6 +623,8 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, }; auto IsConstantInit = [&](const Expr *Init) { assert(Init); + ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, + /*NoWronSidedVars=*/true); return Init->isConstantInitializer(S.Context, VD->getType()->isReferenceType()); }; @@ -606,6 +641,13 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, } // namespace void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { + // Return early if VD is inside a non-instantiated template function since + // the implicit constructor is not defined yet. + if (const FunctionDecl *FD = + dyn_cast_or_null<FunctionDecl>(VD->getDeclContext())) + if (FD->isDependentContext()) + return; + // Do not check dependent variables since the ctor/dtor/initializer are not // determined. Do it after instantiation. if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || @@ -645,6 +687,27 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { } } +void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( + const FunctionDecl *Callee) { + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + if (!Caller) + return; + + if (!isCUDAImplicitHostDeviceFunction(Callee)) + return; + + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); + + // Record whether an implicit host device function is used on device side. + if (CallerTarget != CFT_Device && CallerTarget != CFT_Global && + (CallerTarget != CFT_HostDevice || + (isCUDAImplicitHostDeviceFunction(Caller) && + !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) + return; + + getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); +} + // With -fcuda-host-device-constexpr, an unattributed constexpr function is // treated as implicitly __host__ __device__, unless: // * it is a variadic function (device-side variadic functions are not @@ -669,6 +732,18 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, return; } + // If a template function has no host/device/global attributes, + // make it implicitly host device function. + if (getLangOpts().OffloadImplicitHostDeviceTemplates && + !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && + !NewD->hasAttr<CUDAGlobalAttr>() && + (NewD->getDescribedFunctionTemplate() || + NewD->isFunctionTemplateSpecialization())) { + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + return; + } + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) @@ -713,12 +788,12 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { // Do not promote dependent variables since the cotr/dtor/initializer are // not determined. Do it after instantiation. if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && - !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() && + !VD->hasAttr<CUDASharedAttr>() && (VD->isFileVarDecl() || VD->isStaticDataMember()) && !IsDependentVar(VD) && - (VD->isConstexpr() || (VD->getType().isConstQualified() && - HasAllowedCUDADeviceStaticInitializer( - *this, VD, CICK_DeviceOrConstant)))) { + ((VD->isConstexpr() || VD->getType().isConstQualified()) && + HasAllowedCUDADeviceStaticInitializer(*this, VD, + CICK_DeviceOrConstant))) { VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); } } @@ -726,8 +801,9 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); SemaDiagnosticBuilder::Kind DiagKind = [&] { - if (!isa<FunctionDecl>(CurContext)) + if (!CurFunContext) return SemaDiagnosticBuilder::K_Nop; switch (CurrentCUDATarget()) { case CFT_Global: @@ -741,7 +817,7 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, return SemaDiagnosticBuilder::K_Nop; if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; - return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + return (getEmissionStatus(CurFunContext) == FunctionEmissionStatus::Emitted) ? SemaDiagnosticBuilder::K_ImmediateWithCallStack : SemaDiagnosticBuilder::K_Deferred; @@ -749,15 +825,15 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, return SemaDiagnosticBuilder::K_Nop; } }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, - dyn_cast<FunctionDecl>(CurContext), *this); + return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); } Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); SemaDiagnosticBuilder::Kind DiagKind = [&] { - if (!isa<FunctionDecl>(CurContext)) + if (!CurFunContext) return SemaDiagnosticBuilder::K_Nop; switch (CurrentCUDATarget()) { case CFT_Host: @@ -770,7 +846,7 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, return SemaDiagnosticBuilder::K_Nop; if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; - return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + return (getEmissionStatus(CurFunContext) == FunctionEmissionStatus::Emitted) ? SemaDiagnosticBuilder::K_ImmediateWithCallStack : SemaDiagnosticBuilder::K_Deferred; @@ -778,21 +854,20 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, return SemaDiagnosticBuilder::K_Nop; } }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, - dyn_cast<FunctionDecl>(CurContext), *this); + return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); } bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); - auto &ExprEvalCtx = ExprEvalContexts.back(); + const auto &ExprEvalCtx = currentEvaluationContext(); if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) return true; // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? - FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); if (!Caller) return true; @@ -817,8 +892,13 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { } }(); - if (DiagKind == SemaDiagnosticBuilder::K_Nop) + if (DiagKind == SemaDiagnosticBuilder::K_Nop) { + // For -fgpu-rdc, keep track of external kernels used by host functions. + if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode && + Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined()) + getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); return true; + } // Avoid emitting this error twice for the same location. Using a hashtable // like this is unfortunate, but because we must continue parsing as normal @@ -858,7 +938,7 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, // File-scope lambda can only do init captures for global variables, which // results in passing by value for these global variables. - FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); if (!Caller) return; @@ -873,15 +953,19 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, if (!ShouldCheck || !Capture.isReferenceCapture()) return; auto DiagKind = SemaDiagnosticBuilder::K_Deferred; - if (Capture.isVariableCapture()) { + if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), diag::err_capture_bad_target, Callee, *this) << Capture.getVariable(); } else if (Capture.isThisCapture()) { + // Capture of this pointer is allowed since this pointer may be pointing to + // managed memory which is accessible on both device and host sides. It only + // results in invalid memory access if this pointer points to memory not + // accessible on device side. SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), - diag::err_capture_bad_target_this_ptr, Callee, *this); + diag::warn_maybe_capture_bad_target_this_ptr, Callee, + *this); } - return; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { @@ -908,7 +992,14 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, // HD/global functions "exist" in some sense on both the host and device, so // should have the same implementation on both sides. if (NewTarget != OldTarget && - ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || + ((NewTarget == CFT_HostDevice && + !(LangOpts.OffloadImplicitHostDeviceTemplates && + isCUDAImplicitHostDeviceFunction(NewFD) && + OldTarget == CFT_Device)) || + (OldTarget == CFT_HostDevice && + !(LangOpts.OffloadImplicitHostDeviceTemplates && + isCUDAImplicitHostDeviceFunction(OldFD) && + NewTarget == CFT_Device)) || (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, /* ConsiderCudaAttrs = */ false)) { |