diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-04-14 21:41:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-06-22 18:20:56 +0000 |
commit | bdd1243df58e60e85101c09001d9812a789b6bc4 (patch) | |
tree | a1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp | |
parent | 781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff) | |
parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) | |
download | src-bdd1243df58e60e85101c09001d9812a789b6bc4.tar.gz src-bdd1243df58e60e85101c09001d9812a789b6bc4.zip |
Merge llvm-project main llvmorg-16-init-18548-gb0daacf58f41
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-16-init-18548-gb0daacf58f41.
PR: 271047
MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp | 71 |
1 files changed, 42 insertions, 29 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp index a8bb0dd65d1a..bb887df3e4e0 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp @@ -49,10 +49,10 @@ private: const Decl *D; }; llvm::SmallVector<KernelInfo, 16> EmittedKernels; - // Map a device stub function to a symbol for identifying kernel in host code. + // Map a kernel mangled name to a symbol for identifying kernel in host code // For CUDA, the symbol for identifying the kernel is the same as the device // stub function. For HIP, they are different. - llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles; + llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles; // Map a kernel handle to the kernel stub. llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs; struct VarInfo { @@ -69,6 +69,8 @@ private: bool RelocatableDeviceCode; /// Mangle context for device. std::unique_ptr<MangleContext> DeviceMC; + /// Some zeros used for GEPs. + llvm::Constant *Zeros[2]; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -86,14 +88,25 @@ private: /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. llvm::Constant *makeConstantString(const std::string &Str, - const std::string &Name = "", - const std::string &SectionName = "", - unsigned Alignment = 0) { - llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), - llvm::ConstantInt::get(SizeTy, 0)}; + const std::string &Name = "") { auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str()); - llvm::GlobalVariable *GV = - cast<llvm::GlobalVariable>(ConstStr.getPointer()); + return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), + ConstStr.getPointer(), Zeros); + } + + /// Helper function which generates an initialized constant array from Str, + /// and optionally sets section name and alignment. AddNull specifies whether + /// the array should nave NUL termination. + llvm::Constant *makeConstantArray(StringRef Str, + StringRef Name = "", + StringRef SectionName = "", + unsigned Alignment = 0, + bool AddNull = false) { + llvm::Constant *Value = + llvm::ConstantDataArray::getString(Context, Str, AddNull); + auto *GV = new llvm::GlobalVariable( + TheModule, Value->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Value, Name); if (!SectionName.empty()) { GV->setSection(SectionName); // Mark the address as used which make sure that this section isn't @@ -102,9 +115,7 @@ private: } if (Alignment) GV->setAlignment(llvm::Align(Alignment)); - - return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), - ConstStr.getPointer(), Zeros); + return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); } /// Helper function that generates an empty dummy function returning void. @@ -220,6 +231,8 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) IntTy = CGM.IntTy; SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; + Zeros[0] = llvm::ConstantInt::get(SizeTy, 0); + Zeros[1] = Zeros[0]; CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); @@ -297,7 +310,8 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); - if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) { + if (auto *GV = + dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn->getName()])) { GV->setLinkage(CGF.CurFn->getLinkage()); GV->setInitializer(CGF.CurFn); } @@ -387,8 +401,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch - llvm::Value *Kernel = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy); + llvm::Value *Kernel = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); @@ -443,8 +457,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, // Emit the call to cudaLaunch llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); - llvm::Value *Arg = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy); + llvm::Value *Arg = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -538,7 +552,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy), + Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), @@ -744,9 +758,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // If fatbin is available from early finalization, create a string // literal containing the fat binary loaded from the given file. const unsigned HIPCodeObjectAlign = 4096; - FatBinStr = - makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, HIPCodeObjectAlign); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, HIPCodeObjectAlign); } else { // If fatbin is not available, create an external symbol // __hip_fatbin in section .hip_fatbin. The external symbol is supposed @@ -780,8 +793,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // For CUDA, create a string literal containing the fat binary loaded from // the given file. - FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", - FatbinConstantName, 8); + FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, 8); FatMagic = CudaFatMagic; } @@ -888,8 +901,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); - llvm::Constant *ModuleIDConstant = makeConstantString( - std::string(ModuleID.str()), "", ModuleIDSectionName, 32); + llvm::Constant *ModuleIDConstant = makeConstantArray( + std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true); // Create an alias for the FatbinWrapper that nvcc will look for. llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage, @@ -1118,7 +1131,7 @@ void CGNVCUDARuntime::createOffloadingEntries() { StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" : "cuda_offloading_entries"; for (KernelInfo &I : EmittedKernels) - OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel], + OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()], getDeviceSideName(cast<NamedDecl>(I.D)), 0, DeviceVarFlags::OffloadGlobalEntry, Section); @@ -1181,12 +1194,12 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { - auto Loc = KernelHandles.find(F); + auto Loc = KernelHandles.find(F->getName()); if (Loc != KernelHandles.end()) return Loc->second; if (!CGM.getLangOpts().HIP) { - KernelHandles[F] = F; + KernelHandles[F->getName()] = F; KernelStubs[F] = F; return F; } @@ -1200,7 +1213,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); - KernelHandles[F] = Var; + KernelHandles[F->getName()] = Var; KernelStubs[Var] = F; return Var; } |