aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp366
1 files changed, 366 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp b/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp
new file mode 100644
index 000000000000..ec7f1c439b18
--- /dev/null
+++ b/contrib/llvm-project/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -0,0 +1,366 @@
+//===- NVPTX.cpp ----------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIInfoImpl.h"
+#include "TargetInfo.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+
+using namespace clang;
+using namespace clang::CodeGen;
+
+//===----------------------------------------------------------------------===//
+// NVPTX ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class NVPTXTargetCodeGenInfo;
+
+class NVPTXABIInfo : public ABIInfo {
+ NVPTXTargetCodeGenInfo &CGInfo;
+
+public:
+ NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
+ : ABIInfo(CGT), CGInfo(Info) {}
+
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyArgumentType(QualType Ty) const;
+
+ void computeInfo(CGFunctionInfo &FI) const override;
+ RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+ AggValueSlot Slot) const override;
+ bool isUnsupportedType(QualType T) const;
+ ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
+};
+
+class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
+ bool shouldEmitStaticExternCAliases() const override;
+
+ llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
+ llvm::PointerType *T,
+ QualType QT) const override;
+
+ llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
+ // On the device side, surface reference is represented as an object handle
+ // in 64-bit integer.
+ return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+ }
+
+ llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
+ // On the device side, texture reference is represented as an object handle
+ // in 64-bit integer.
+ return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
+ }
+
+ bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) const override {
+ emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+ return true;
+ }
+
+ bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) const override {
+ emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
+ return true;
+ }
+
+ // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
+ // resulting MDNode to the nvvm.annotations MDNode.
+ static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+ int Operand);
+
+private:
+ static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
+ LValue Src) {
+ llvm::Value *Handle = nullptr;
+ llvm::Constant *C =
+ llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
+ // Lookup `addrspacecast` through the constant pointer if any.
+ if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
+ C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
+ if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
+ // Load the handle from the specific global variable using
+ // `nvvm.texsurf.handle.internal` intrinsic.
+ Handle = CGF.EmitRuntimeCall(
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
+ {GV->getType()}),
+ {GV}, "texsurf_handle");
+ } else
+ Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
+ CGF.EmitStoreOfScalar(Handle, Dst);
+ }
+};
+
+/// Checks if the type is unsupported directly by the current target.
+bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
+ ASTContext &Context = getContext();
+ if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
+ return true;
+ if (!Context.getTargetInfo().hasFloat128Type() &&
+ (T->isFloat128Type() ||
+ (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
+ return true;
+ if (const auto *EIT = T->getAs<BitIntType>())
+ return EIT->getNumBits() >
+ (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
+ if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
+ Context.getTypeSize(T) > 64U)
+ return true;
+ if (const auto *AT = T->getAsArrayTypeUnsafe())
+ return isUnsupportedType(AT->getElementType());
+ const auto *RT = T->getAs<RecordType>();
+ if (!RT)
+ return false;
+ const RecordDecl *RD = RT->getDecl();
+
+ // If this is a C++ record, check the bases first.
+ if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+ for (const CXXBaseSpecifier &I : CXXRD->bases())
+ if (isUnsupportedType(I.getType()))
+ return true;
+
+ for (const FieldDecl *I : RD->fields())
+ if (isUnsupportedType(I->getType()))
+ return true;
+ return false;
+}
+
+/// Coerce the given type into an array with maximum allowed size of elements.
+ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
+ unsigned MaxSize) const {
+ // Alignment and Size are measured in bits.
+ const uint64_t Size = getContext().getTypeSize(Ty);
+ const uint64_t Alignment = getContext().getTypeAlign(Ty);
+ const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
+ llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
+ const uint64_t NumElements = (Size + Div - 1) / Div;
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
+}
+
+ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isVoidType())
+ return ABIArgInfo::getIgnore();
+
+ if (getContext().getLangOpts().OpenMP &&
+ getContext().getLangOpts().OpenMPIsTargetDevice &&
+ isUnsupportedType(RetTy))
+ return coerceToIntArrayWithLimit(RetTy, 64);
+
+ // note: this is different from default ABI
+ if (!RetTy->isScalarType())
+ return ABIArgInfo::getDirect();
+
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+ RetTy = EnumTy->getDecl()->getIntegerType();
+
+ return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+ : ABIArgInfo::getDirect());
+}
+
+ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ // Return aggregates type as indirect by value
+ if (isAggregateTypeForABI(Ty)) {
+ // Under CUDA device compilation, tex/surf builtin types are replaced with
+ // object types and passed directly.
+ if (getContext().getLangOpts().CUDAIsDevice) {
+ if (Ty->isCUDADeviceBuiltinSurfaceType())
+ return ABIArgInfo::getDirect(
+ CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
+ if (Ty->isCUDADeviceBuiltinTextureType())
+ return ABIArgInfo::getDirect(
+ CGInfo.getCUDADeviceBuiltinTextureDeviceType());
+ }
+ return getNaturalAlignIndirect(Ty, /* byval */ true);
+ }
+
+ if (const auto *EIT = Ty->getAs<BitIntType>()) {
+ if ((EIT->getNumBits() > 128) ||
+ (!getContext().getTargetInfo().hasInt128Type() &&
+ EIT->getNumBits() > 64))
+ return getNaturalAlignIndirect(Ty, /* byval */ true);
+ }
+
+ return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+ : ABIArgInfo::getDirect());
+}
+
+void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+
+ for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
+ I.info = ArgumentsCount < FI.getNumRequiredArgs()
+ ? classifyArgumentType(I.type)
+ : ABIArgInfo::getDirect();
+
+ // Always honor user-specified calling convention.
+ if (FI.getCallingConvention() != llvm::CallingConv::C)
+ return;
+
+ FI.setEffectiveCallingConvention(getRuntimeCC());
+}
+
+RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty, AggValueSlot Slot) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(1),
+ /*AllowHigherAlign=*/true, Slot);
+}
+
+void NVPTXTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (GV->isDeclaration())
+ return;
+ const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
+ if (VD) {
+ if (M.getLangOpts().CUDA) {
+ if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
+ addNVVMMetadata(GV, "surface", 1);
+ else if (VD->getType()->isCUDADeviceBuiltinTextureType())
+ addNVVMMetadata(GV, "texture", 1);
+ return;
+ }
+ }
+
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD) return;
+
+ llvm::Function *F = cast<llvm::Function>(GV);
+
+ // Perform special handling in OpenCL mode
+ if (M.getLangOpts().OpenCL) {
+ // Use OpenCL function attributes to check for kernel functions
+ // By default, all functions are device functions
+ if (FD->hasAttr<OpenCLKernelAttr>()) {
+ // OpenCL __kernel functions get kernel metadata
+ // Create !{<func-ref>, metadata !"kernel", i32 1} node
+ addNVVMMetadata(F, "kernel", 1);
+ // And kernel functions are not subject to inlining
+ F->addFnAttr(llvm::Attribute::NoInline);
+ }
+ }
+
+ // Perform special handling in CUDA mode.
+ if (M.getLangOpts().CUDA) {
+ // CUDA __global__ functions get a kernel metadata entry. Since
+ // __global__ functions cannot be called from the device, we do not
+ // need to set the noinline attribute.
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ // Create !{<func-ref>, metadata !"kernel", i32 1} node
+ addNVVMMetadata(F, "kernel", 1);
+ }
+ if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+ M.handleCUDALaunchBoundsAttr(F, Attr);
+ }
+
+ // Attach kernel metadata directly if compiling for NVPTX.
+ if (FD->hasAttr<NVPTXKernelAttr>()) {
+ addNVVMMetadata(F, "kernel", 1);
+ }
+}
+
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+ StringRef Name, int Operand) {
+ llvm::Module *M = GV->getParent();
+ llvm::LLVMContext &Ctx = M->getContext();
+
+ // Get "nvvm.annotations" metadata node
+ llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+ llvm::Metadata *MDVals[] = {
+ llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
+ llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+ // Append metadata to nvvm.annotations
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
+ return false;
+}
+
+llvm::Constant *
+NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
+ llvm::PointerType *PT,
+ QualType QT) const {
+ auto &Ctx = CGM.getContext();
+ if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
+ return llvm::ConstantPointerNull::get(PT);
+
+ auto NPT = llvm::PointerType::get(
+ PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
+ return llvm::ConstantExpr::getAddrSpaceCast(
+ llvm::ConstantPointerNull::get(NPT), PT);
+}
+}
+
+void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
+ const CUDALaunchBoundsAttr *Attr,
+ int32_t *MaxThreadsVal,
+ int32_t *MinBlocksVal,
+ int32_t *MaxClusterRankVal) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ llvm::APSInt MaxThreads(32);
+ MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
+ if (MaxThreads > 0) {
+ if (MaxThreadsVal)
+ *MaxThreadsVal = MaxThreads.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
+ MaxThreads.getExtValue());
+ }
+ }
+
+ // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
+ // was not specified in __launch_bounds__ or if the user specified a 0 value,
+ // we don't have to add a PTX directive.
+ if (Attr->getMinBlocks()) {
+ llvm::APSInt MinBlocks(32);
+ MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
+ if (MinBlocks > 0) {
+ if (MinBlocksVal)
+ *MinBlocksVal = MinBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
+ MinBlocks.getExtValue());
+ }
+ }
+ }
+ if (Attr->getMaxBlocks()) {
+ llvm::APSInt MaxBlocks(32);
+ MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
+ if (MaxBlocks > 0) {
+ if (MaxClusterRankVal)
+ *MaxClusterRankVal = MaxBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
+ MaxBlocks.getExtValue());
+ }
+ }
+ }
+}
+
+std::unique_ptr<TargetCodeGenInfo>
+CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
+ return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
+}