1 files changed, 40 insertions, 53 deletions
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 203c09c57112..d0ddfd040c9c 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -267,6 +267,18 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
                                                    CXXMethodDecl *MemberDecl,
                                                    bool ConstRHS,
                                                    bool Diagnose) {
+  // If the defaulted special member is defined lexically outside of its
+  // owning class, or the special member already has explicit device or host
+  // attributes, do not infer.
+  bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
+  bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
+  bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
+  bool HasExplicitAttr =
+      (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
+      (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
+  if (!InClass || HasExplicitAttr)
+    return false;
+
   llvm::Optional<CUDAFunctionTarget> InferredTarget;
 
   // We're going to invoke special member lookup; mark that these special
@@ -371,21 +383,23 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
     }
   }
 
+
+  // If no target was inferred, mark this member as __host__ __device__;
+  // it's the least restrictive option that can be invoked from any target.
+  bool NeedsH = true, NeedsD = true;
   if (InferredTarget.hasValue()) {
-    if (InferredTarget.getValue() == CFT_Device) {
-      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
-    } else if (InferredTarget.getValue() == CFT_Host) {
-      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-    } else {
-      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
-      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-    }
-  } else {
-    // If no target was inferred, mark this member as __host__ __device__;
-    // it's the least restrictive option that can be invoked from any target.
+    if (InferredTarget.getValue() == CFT_Device)
+      NeedsH = false;
+    else if (InferredTarget.getValue() == CFT_Host)
+      NeedsD = false;
+  }
+
+  // We either setting attributes first time, or the inferred ones must match
+  // previously set ones.
+  if (NeedsD && !HasD)
     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+  if (NeedsH && !HasH)
     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-  }
 
   return false;
 }
@@ -586,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 }
 
-// Do we know that we will eventually codegen the given function?
-static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
-  // Templates are emitted when they're instantiated.
-  if (FD->isDependentContext())
-    return false;
-
-  // When compiling for device, host functions are never emitted.  Similarly,
-  // when compiling for host, device and global functions are never emitted.
-  // (Technically, we do emit a host-side stub for global functions, but this
-  // doesn't count for our purposes here.)
-  Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
-  if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
-    return false;
-  if (!S.getLangOpts().CUDAIsDevice &&
-      (T == Sema::CFT_Device || T == Sema::CFT_Global))
-    return false;
-
-  // Check whether this function is externally visible -- if so, it's
-  // known-emitted.
-  //
-  // We have to check the GVA linkage of the function's *definition* -- if we
-  // only have a declaration, we don't know whether or not the function will be
-  // emitted, because (say) the definition could include "inline".
-  FunctionDecl *Def = FD->getDefinition();
-
-  if (Def &&
-      !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
-    return true;
-
-  // Otherwise, the function is known-emitted if it's in our set of
-  // known-emitted functions.
-  return S.DeviceKnownEmittedFns.count(FD) > 0;
-}
-
 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
                                                    unsigned DiagID) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
@@ -633,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
       // device code if we're compiling for device.  Defer any errors in device
       // mode until the function is known-emitted.
       if (getLangOpts().CUDAIsDevice) {
-        return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+        return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+                FunctionEmissionStatus::Emitted)
                    ? DeviceDiagBuilder::K_ImmediateWithCallStack
                    : DeviceDiagBuilder::K_Deferred;
       }
@@ -661,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
       if (getLangOpts().CUDAIsDevice)
         return DeviceDiagBuilder::K_Nop;
 
-      return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+      return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+              FunctionEmissionStatus::Emitted)
                  ? DeviceDiagBuilder::K_ImmediateWithCallStack
                  : DeviceDiagBuilder::K_Deferred;
     default:
@@ -688,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
 
   // If the caller is known-emitted, mark the callee as known-emitted.
   // Otherwise, mark the call in our call graph so we can traverse it later.
-  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+  bool CallerKnownEmitted =
+      getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
   if (CallerKnownEmitted) {
     // Host-side references to a __global__ function refer to the stub, so the
     // function itself is never emitted and therefore should not be marked.
-    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
-      markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted);
+    if (!shouldIgnoreInHostDeviceCheck(Callee))
+      markKnownEmitted(
+          *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
+            return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
+          });
   } else {
     // If we have
     //   host fn calls kernel fn calls host+device,
@@ -701,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
     // omitting at the call to the kernel from the callgraph.  This ensures
     // that, when compiling for host, only HD functions actually called from the
     // host get marked as known-emitted.
-    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+    if (!shouldIgnoreInHostDeviceCheck(Callee))
       DeviceCallGraph[Caller].insert({Callee, Loc});
   }
 
@@ -806,7 +792,8 @@ void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
 
 std::string Sema::getCudaConfigureFuncName() const {
   if (getLangOpts().HIP)
-    return "hipConfigureCall";
+    return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
+                                            : "hipConfigureCall";
 
   // New CUDA kernel launch sequence.
   if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),