aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp177
1 files changed, 134 insertions, 43 deletions
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp
index 75364c10c154..6a66ecf6f94c 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaCUDA.cpp
@@ -22,8 +22,8 @@
#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/Template.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include <optional>
using namespace clang;
template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
@@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
}
template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
return isa<A>(Attribute) &&
!(IgnoreImplicitAttr && Attribute->isImplicit());
});
}
+Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
+ CUDATargetContextKind K,
+ Decl *D)
+ : S(S_) {
+ SavedCtx = S.CurCUDATargetCtx;
+ assert(K == CTCK_InitGlobalVar);
+ auto *VD = dyn_cast_or_null<VarDecl>(D);
+ if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
+ auto Target = CFT_Host;
+ if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
+ !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
+ hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
+ hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
+ Target = CFT_Device;
+ S.CurCUDATargetCtx = {Target, K, VD};
+ }
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
bool IgnoreImplicitHDAttr) {
- // Code that lives outside a function is run on the host.
+ // Code that lives outside a function gets the target from CurCUDATargetCtx.
if (D == nullptr)
- return CFT_Host;
+ return CurCUDATargetCtx.Target;
if (D->hasAttr<CUDAInvalidTargetAttr>())
return CFT_InvalidTarget;
@@ -145,9 +163,11 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
if (Var->hasAttr<HIPManagedAttr>())
return CVT_Unified;
- if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
- return CVT_Both;
- if (Var->getType().isConstQualified() && Var->hasAttr<CUDAConstantAttr>() &&
+ // Only constexpr and const variabless with implicit constant attribute
+ // are emitted on both sides. Such variables are promoted to device side
+ // only if they have static constant intializers on device side.
+ if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
+ Var->hasAttr<CUDAConstantAttr>() &&
!hasExplicitAttr<CUDAConstantAttr>(Var))
return CVT_Both;
if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
@@ -205,6 +225,15 @@ Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
const FunctionDecl *Callee) {
assert(Callee && "Callee must be valid.");
+
+ // Treat ctor/dtor as host device function in device var initializer to allow
+ // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
+ // will be diagnosed by checkAllowedCUDAInitializer.
+ if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
+ CurCUDATargetCtx.Target == CFT_Device &&
+ (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
+ return CFP_HostDevice;
+
CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
@@ -229,6 +258,15 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
return CFP_Native;
+ // HipStdPar mode is special, in that assessing whether a device side call to
+ // a host target is deferred to a subsequent pass, and cannot unambiguously be
+ // adjudicated in the AST, hence we optimistically allow them to pass here.
+ if (getLangOpts().HIPStdPar &&
+ (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
+ CallerTarget == CFT_HostDevice) &&
+ CalleeTarget == CFT_Host)
+ return CFP_HostDevice;
+
// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
// It's OK to call a compilation-mode matching function from an HD one.
@@ -336,7 +374,7 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
if (!InClass || HasExplicitAttr)
return false;
- llvm::Optional<CUDAFunctionTarget> InferredTarget;
+ std::optional<CUDAFunctionTarget> InferredTarget;
// We're going to invoke special member lookup; mark that these special
// members are called from this one, and not from its caller.
@@ -353,9 +391,7 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
}
if (!ClassDecl->isAbstract()) {
- for (const auto &VB : ClassDecl->vbases()) {
- Bases.push_back(&VB);
- }
+ llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
}
for (const auto *B : Bases) {
@@ -377,17 +413,16 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
continue;
CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
- if (!InferredTarget.hasValue()) {
+ if (!InferredTarget) {
InferredTarget = BaseMethodTarget;
} else {
bool ResolutionError = resolveCalleeCUDATargetConflict(
- InferredTarget.getValue(), BaseMethodTarget,
- InferredTarget.getPointer());
+ *InferredTarget, BaseMethodTarget, &*InferredTarget);
if (ResolutionError) {
if (Diagnose) {
Diag(ClassDecl->getLocation(),
diag::note_implicit_member_target_infer_collision)
- << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
+ << (unsigned)CSM << *InferredTarget << BaseMethodTarget;
}
MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
return true;
@@ -421,18 +456,16 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
CUDAFunctionTarget FieldMethodTarget =
IdentifyCUDATarget(SMOR.getMethod());
- if (!InferredTarget.hasValue()) {
+ if (!InferredTarget) {
InferredTarget = FieldMethodTarget;
} else {
bool ResolutionError = resolveCalleeCUDATargetConflict(
- InferredTarget.getValue(), FieldMethodTarget,
- InferredTarget.getPointer());
+ *InferredTarget, FieldMethodTarget, &*InferredTarget);
if (ResolutionError) {
if (Diagnose) {
Diag(ClassDecl->getLocation(),
diag::note_implicit_member_target_infer_collision)
- << (unsigned)CSM << InferredTarget.getValue()
- << FieldMethodTarget;
+ << (unsigned)CSM << *InferredTarget << FieldMethodTarget;
}
MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
return true;
@@ -444,10 +477,10 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
// If no target was inferred, mark this member as __host__ __device__;
// it's the least restrictive option that can be invoked from any target.
bool NeedsH = true, NeedsD = true;
- if (InferredTarget.hasValue()) {
- if (InferredTarget.getValue() == CFT_Device)
+ if (InferredTarget) {
+ if (*InferredTarget == CFT_Device)
NeedsH = false;
- else if (InferredTarget.getValue() == CFT_Host)
+ else if (*InferredTarget == CFT_Host)
NeedsD = false;
}
@@ -590,6 +623,8 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
};
auto IsConstantInit = [&](const Expr *Init) {
assert(Init);
+ ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context,
+ /*NoWronSidedVars=*/true);
return Init->isConstantInitializer(S.Context,
VD->getType()->isReferenceType());
};
@@ -606,6 +641,13 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
} // namespace
void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
+ // Return early if VD is inside a non-instantiated template function since
+ // the implicit constructor is not defined yet.
+ if (const FunctionDecl *FD =
+ dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))
+ if (FD->isDependentContext())
+ return;
+
// Do not check dependent variables since the ctor/dtor/initializer are not
// determined. Do it after instantiation.
if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
@@ -645,6 +687,27 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
}
}
+void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice(
+ const FunctionDecl *Callee) {
+ FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+ if (!Caller)
+ return;
+
+ if (!isCUDAImplicitHostDeviceFunction(Callee))
+ return;
+
+ CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
+
+ // Record whether an implicit host device function is used on device side.
+ if (CallerTarget != CFT_Device && CallerTarget != CFT_Global &&
+ (CallerTarget != CFT_HostDevice ||
+ (isCUDAImplicitHostDeviceFunction(Caller) &&
+ !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
+ return;
+
+ getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
+}
+
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
// treated as implicitly __host__ __device__, unless:
// * it is a variadic function (device-side variadic functions are not
@@ -669,6 +732,18 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
return;
}
+ // If a template function has no host/device/global attributes,
+ // make it implicitly host device function.
+ if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
+ !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
+ !NewD->hasAttr<CUDAGlobalAttr>() &&
+ (NewD->getDescribedFunctionTemplate() ||
+ NewD->isFunctionTemplateSpecialization())) {
+ NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
+ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ return;
+ }
+
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
@@ -713,12 +788,12 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
// Do not promote dependent variables since the cotr/dtor/initializer are
// not determined. Do it after instantiation.
if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
- !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() &&
+ !VD->hasAttr<CUDASharedAttr>() &&
(VD->isFileVarDecl() || VD->isStaticDataMember()) &&
!IsDependentVar(VD) &&
- (VD->isConstexpr() || (VD->getType().isConstQualified() &&
- HasAllowedCUDADeviceStaticInitializer(
- *this, VD, CICK_DeviceOrConstant)))) {
+ ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
+ HasAllowedCUDADeviceStaticInitializer(*this, VD,
+ CICK_DeviceOrConstant))) {
VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
}
}
@@ -726,8 +801,9 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true);
SemaDiagnosticBuilder::Kind DiagKind = [&] {
- if (!isa<FunctionDecl>(CurContext))
+ if (!CurFunContext)
return SemaDiagnosticBuilder::K_Nop;
switch (CurrentCUDATarget()) {
case CFT_Global:
@@ -741,7 +817,7 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
return SemaDiagnosticBuilder::K_Nop;
if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
return SemaDiagnosticBuilder::K_Immediate;
- return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ return (getEmissionStatus(CurFunContext) ==
FunctionEmissionStatus::Emitted)
? SemaDiagnosticBuilder::K_ImmediateWithCallStack
: SemaDiagnosticBuilder::K_Deferred;
@@ -749,15 +825,15 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
return SemaDiagnosticBuilder::K_Nop;
}
}();
- return SemaDiagnosticBuilder(DiagKind, Loc, DiagID,
- dyn_cast<FunctionDecl>(CurContext), *this);
+ return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this);
}
Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true);
SemaDiagnosticBuilder::Kind DiagKind = [&] {
- if (!isa<FunctionDecl>(CurContext))
+ if (!CurFunContext)
return SemaDiagnosticBuilder::K_Nop;
switch (CurrentCUDATarget()) {
case CFT_Host:
@@ -770,7 +846,7 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
return SemaDiagnosticBuilder::K_Nop;
if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
return SemaDiagnosticBuilder::K_Immediate;
- return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ return (getEmissionStatus(CurFunContext) ==
FunctionEmissionStatus::Emitted)
? SemaDiagnosticBuilder::K_ImmediateWithCallStack
: SemaDiagnosticBuilder::K_Deferred;
@@ -778,21 +854,20 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
return SemaDiagnosticBuilder::K_Nop;
}
}();
- return SemaDiagnosticBuilder(DiagKind, Loc, DiagID,
- dyn_cast<FunctionDecl>(CurContext), *this);
+ return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this);
}
bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
assert(Callee && "Callee may not be null.");
- auto &ExprEvalCtx = ExprEvalContexts.back();
+ const auto &ExprEvalCtx = currentEvaluationContext();
if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
return true;
// FIXME: Is bailing out early correct here? Should we instead assume that
// the caller is a global initializer?
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+ FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
if (!Caller)
return true;
@@ -817,8 +892,13 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
}
}();
- if (DiagKind == SemaDiagnosticBuilder::K_Nop)
+ if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
+ // For -fgpu-rdc, keep track of external kernels used by host functions.
+ if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
+ Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
+ getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
return true;
+ }
// Avoid emitting this error twice for the same location. Using a hashtable
// like this is unfortunate, but because we must continue parsing as normal
@@ -858,7 +938,7 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
// File-scope lambda can only do init captures for global variables, which
// results in passing by value for these global variables.
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+ FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
if (!Caller)
return;
@@ -873,15 +953,19 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
if (!ShouldCheck || !Capture.isReferenceCapture())
return;
auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
- if (Capture.isVariableCapture()) {
+ if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
diag::err_capture_bad_target, Callee, *this)
<< Capture.getVariable();
} else if (Capture.isThisCapture()) {
+ // Capture of this pointer is allowed since this pointer may be pointing to
+ // managed memory which is accessible on both device and host sides. It only
+ // results in invalid memory access if this pointer points to memory not
+ // accessible on device side.
SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
- diag::err_capture_bad_target_this_ptr, Callee, *this);
+ diag::warn_maybe_capture_bad_target_this_ptr, Callee,
+ *this);
}
- return;
}
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
@@ -908,7 +992,14 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
// HD/global functions "exist" in some sense on both the host and device, so
// should have the same implementation on both sides.
if (NewTarget != OldTarget &&
- ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
+ ((NewTarget == CFT_HostDevice &&
+ !(LangOpts.OffloadImplicitHostDeviceTemplates &&
+ isCUDAImplicitHostDeviceFunction(NewFD) &&
+ OldTarget == CFT_Device)) ||
+ (OldTarget == CFT_HostDevice &&
+ !(LangOpts.OffloadImplicitHostDeviceTemplates &&
+ isCUDAImplicitHostDeviceFunction(OldFD) &&
+ NewTarget == CFT_Device)) ||
(NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
!IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
/* ConsiderCudaAttrs = */ false)) {