diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp | 23 |
1 files changed, 14 insertions, 9 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp index 4d4038dae9cf..5c5cbaff0252 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGCUDANV.cpp @@ -93,7 +93,7 @@ private: GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); } if (Alignment) - GV->setAlignment(Alignment); + GV->setAlignment(llvm::Align(Alignment)); return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), ConstStr.getPointer(), Zeros); @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), - CudaFeature::CUDA_USES_NEW_LAUNCH)) + CudaFeature::CUDA_USES_NEW_LAUNCH) || + CGF.getLangOpts().HIPUseNewLaunchAPI) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - // Lookup cudaLaunchKernel function. + // Lookup cudaLaunchKernel/hipLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); + // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // hipStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + auto LaunchKernelName = addPrefixToName("LaunchKernel"); IdentifierInfo &cudaLaunchKernelII = - CGM.getContext().Idents.get("cudaLaunchKernel"); + CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), - "Can't find declaration for cudaLaunchKernel()"); + "Can't find declaration for " + LaunchKernelName); return; } // Create temporary dim3 grid_dim, block_dim. @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), - "__cudaPopCallConfiguration"); + addUnderscoredPrefixToName("PopCallConfiguration")); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), @@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = - CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGM.CreateRuntimeFunction(FTy, LaunchKernelName); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); @@ -623,7 +628,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -664,7 +669,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); |