aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-04-14 21:41:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-06-22 18:20:56 +0000
commitbdd1243df58e60e85101c09001d9812a789b6bc4 (patch)
treea1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
parent781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff)
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
downloadsrc-bdd1243df58e60e85101c09001d9812a789b6bc4.tar.gz
src-bdd1243df58e60e85101c09001d9812a789b6bc4.zip
Merge llvm-project main llvmorg-16-init-18548-gb0daacf58f41
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-16-init-18548-gb0daacf58f41. PR: 271047 MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp71
1 files changed, 42 insertions, 29 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
index a8bb0dd65d1a..bb887df3e4e0 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp
@@ -49,10 +49,10 @@ private:
const Decl *D;
};
llvm::SmallVector<KernelInfo, 16> EmittedKernels;
- // Map a device stub function to a symbol for identifying kernel in host code.
+ // Map a kernel mangled name to a symbol for identifying kernel in host code
// For CUDA, the symbol for identifying the kernel is the same as the device
// stub function. For HIP, they are different.
- llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
+ llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;
// Map a kernel handle to the kernel stub.
llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
struct VarInfo {
@@ -69,6 +69,8 @@ private:
bool RelocatableDeviceCode;
/// Mangle context for device.
std::unique_ptr<MangleContext> DeviceMC;
+ /// Some zeros used for GEPs.
+ llvm::Constant *Zeros[2];
llvm::FunctionCallee getSetupArgumentFn() const;
llvm::FunctionCallee getLaunchFn() const;
@@ -86,14 +88,25 @@ private:
/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.
llvm::Constant *makeConstantString(const std::string &Str,
- const std::string &Name = "",
- const std::string &SectionName = "",
- unsigned Alignment = 0) {
- llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
- llvm::ConstantInt::get(SizeTy, 0)};
+ const std::string &Name = "") {
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
- llvm::GlobalVariable *GV =
- cast<llvm::GlobalVariable>(ConstStr.getPointer());
+ return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
+ ConstStr.getPointer(), Zeros);
+ }
+
+ /// Helper function which generates an initialized constant array from Str,
+ /// and optionally sets section name and alignment. AddNull specifies whether
+ /// the array should nave NUL termination.
+ llvm::Constant *makeConstantArray(StringRef Str,
+ StringRef Name = "",
+ StringRef SectionName = "",
+ unsigned Alignment = 0,
+ bool AddNull = false) {
+ llvm::Constant *Value =
+ llvm::ConstantDataArray::getString(Context, Str, AddNull);
+ auto *GV = new llvm::GlobalVariable(
+ TheModule, Value->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, Value, Name);
if (!SectionName.empty()) {
GV->setSection(SectionName);
// Mark the address as used which make sure that this section isn't
@@ -102,9 +115,7 @@ private:
}
if (Alignment)
GV->setAlignment(llvm::Align(Alignment));
-
- return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
- ConstStr.getPointer(), Zeros);
+ return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
}
/// Helper function that generates an empty dummy function returning void.
@@ -220,6 +231,8 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
IntTy = CGM.IntTy;
SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;
+ Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
+ Zeros[1] = Zeros[0];
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
@@ -297,7 +310,8 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
- if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
+ if (auto *GV =
+ dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn->getName()])) {
GV->setLinkage(CGF.CurFn->getLinkage());
GV->setInitializer(CGF.CurFn);
}
@@ -387,8 +401,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
ShmemSize.getPointer(), Stream.getPointer()});
// Emit the call to cudaLaunch
- llvm::Value *Kernel =
- CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
+ llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
+ KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -443,8 +457,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
- llvm::Value *Arg =
- CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
+ llvm::Value *Arg = CGF.Builder.CreatePointerCast(
+ KernelHandles[CGF.CurFn->getName()], CharPtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
@@ -538,7 +552,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
+ Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy),
KernelName,
KernelName,
llvm::ConstantInt::get(IntTy, -1),
@@ -744,9 +758,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// If fatbin is available from early finalization, create a string
// literal containing the fat binary loaded from the given file.
const unsigned HIPCodeObjectAlign = 4096;
- FatBinStr =
- makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
- FatbinConstantName, HIPCodeObjectAlign);
+ FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+ FatbinConstantName, HIPCodeObjectAlign);
} else {
// If fatbin is not available, create an external symbol
// __hip_fatbin in section .hip_fatbin. The external symbol is supposed
@@ -780,8 +793,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// For CUDA, create a string literal containing the fat binary loaded from
// the given file.
- FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
- FatbinConstantName, 8);
+ FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+ FatbinConstantName, 8);
FatMagic = CudaFatMagic;
}
@@ -888,8 +901,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
- llvm::Constant *ModuleIDConstant = makeConstantString(
- std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
+ llvm::Constant *ModuleIDConstant = makeConstantArray(
+ std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true);
// Create an alias for the FatbinWrapper that nvcc will look for.
llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
@@ -1118,7 +1131,7 @@ void CGNVCUDARuntime::createOffloadingEntries() {
StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries"
: "cuda_offloading_entries";
for (KernelInfo &I : EmittedKernels)
- OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel],
+ OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()],
getDeviceSideName(cast<NamedDecl>(I.D)), 0,
DeviceVarFlags::OffloadGlobalEntry, Section);
@@ -1181,12 +1194,12 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
GlobalDecl GD) {
- auto Loc = KernelHandles.find(F);
+ auto Loc = KernelHandles.find(F->getName());
if (Loc != KernelHandles.end())
return Loc->second;
if (!CGM.getLangOpts().HIP) {
- KernelHandles[F] = F;
+ KernelHandles[F->getName()] = F;
KernelStubs[F] = F;
return F;
}
@@ -1200,7 +1213,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Var->setDSOLocal(F->isDSOLocal());
Var->setVisibility(F->getVisibility());
CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
- KernelHandles[F] = Var;
+ KernelHandles[F->getName()] = Var;
KernelStubs[Var] = F;
return Var;
}