aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp324
1 files changed, 126 insertions, 198 deletions
diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
index ba7ffa34c73e..3b748d0249d5 100644
--- a/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -13,11 +13,11 @@
#include "AMDGPU.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
-#include "llvm/ADT/StringSwitch.h"
-
+#include "llvm/ADT/SmallString.h"
using namespace clang;
using namespace clang::targets;
@@ -33,63 +33,66 @@ static const char *const DataLayoutStringR600 =
static const char *const DataLayoutStringAMDGCN =
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
- "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
+ "32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
- "-ni:7";
+ "-ni:7:8:9";
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
- Generic, // Default
- Global, // opencl_global
- Local, // opencl_local
- Constant, // opencl_constant
- Private, // opencl_private
- Generic, // opencl_generic
- Global, // opencl_global_device
- Global, // opencl_global_host
- Global, // cuda_device
- Constant, // cuda_constant
- Local, // cuda_shared
- Global, // sycl_global
- Global, // sycl_global_device
- Global, // sycl_global_host
- Local, // sycl_local
- Private, // sycl_private
- Generic, // ptr32_sptr
- Generic, // ptr32_uptr
- Generic // ptr64
+ llvm::AMDGPUAS::FLAT_ADDRESS, // Default
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
+ llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
+ llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
+ llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
+ llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
+ llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
+ llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
+ llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
+ llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
+ llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
};
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
- Private, // Default
- Global, // opencl_global
- Local, // opencl_local
- Constant, // opencl_constant
- Private, // opencl_private
- Generic, // opencl_generic
- Global, // opencl_global_device
- Global, // opencl_global_host
- Global, // cuda_device
- Constant, // cuda_constant
- Local, // cuda_shared
+ llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
+ llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
+ llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
+ llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
+ llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
+ llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
+ llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
+ llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
// SYCL address space values for this map are dummy
- Generic, // sycl_global
- Generic, // sycl_global_device
- Generic, // sycl_global_host
- Generic, // sycl_local
- Generic, // sycl_private
- Generic, // ptr32_sptr
- Generic, // ptr32_uptr
- Generic // ptr64
+ llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
+ llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
+ llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
+ llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
+ llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
+ llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
+ llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
};
} // namespace targets
} // namespace clang
-const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
+static constexpr Builtin::Info BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) \
- {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
+ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
- {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
+ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#include "clang/Basic/BuiltinsAMDGPU.def"
};
@@ -171,7 +174,7 @@ const char *const AMDGPUTargetInfo::GCCRegNames[] = {
};
ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
- return llvm::makeArrayRef(GCCRegNames);
+ return llvm::ArrayRef(GCCRegNames);
}
bool AMDGPUTargetInfo::initFeatureMap(
@@ -179,136 +182,24 @@ bool AMDGPUTargetInfo::initFeatureMap(
const std::vector<std::string> &FeatureVec) const {
using namespace llvm::AMDGPU;
-
- // XXX - What does the member GPU mean if device name string passed here?
- if (isAMDGCN(getTriple())) {
- switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
- case GK_GFX1035:
- case GK_GFX1034:
- case GK_GFX1033:
- case GK_GFX1032:
- case GK_GFX1031:
- case GK_GFX1030:
- Features["ci-insts"] = true;
- Features["dot1-insts"] = true;
- Features["dot2-insts"] = true;
- Features["dot5-insts"] = true;
- Features["dot6-insts"] = true;
- Features["dot7-insts"] = true;
- Features["dl-insts"] = true;
- Features["flat-address-space"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["gfx10-3-insts"] = true;
- Features["s-memrealtime"] = true;
- Features["s-memtime-inst"] = true;
- break;
- case GK_GFX1012:
- case GK_GFX1011:
- Features["dot1-insts"] = true;
- Features["dot2-insts"] = true;
- Features["dot5-insts"] = true;
- Features["dot6-insts"] = true;
- Features["dot7-insts"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX1013:
- case GK_GFX1010:
- Features["dl-insts"] = true;
- Features["ci-insts"] = true;
- Features["flat-address-space"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["s-memrealtime"] = true;
- Features["s-memtime-inst"] = true;
- break;
- case GK_GFX90A:
- Features["gfx90a-insts"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX908:
- Features["dot3-insts"] = true;
- Features["dot4-insts"] = true;
- Features["dot5-insts"] = true;
- Features["dot6-insts"] = true;
- Features["mai-insts"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX906:
- Features["dl-insts"] = true;
- Features["dot1-insts"] = true;
- Features["dot2-insts"] = true;
- Features["dot7-insts"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX90C:
- case GK_GFX909:
- case GK_GFX904:
- case GK_GFX902:
- case GK_GFX900:
- Features["gfx9-insts"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX810:
- case GK_GFX805:
- case GK_GFX803:
- case GK_GFX802:
- case GK_GFX801:
- Features["gfx8-insts"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["s-memrealtime"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX705:
- case GK_GFX704:
- case GK_GFX703:
- case GK_GFX702:
- case GK_GFX701:
- case GK_GFX700:
- Features["ci-insts"] = true;
- Features["flat-address-space"] = true;
- LLVM_FALLTHROUGH;
- case GK_GFX602:
- case GK_GFX601:
- case GK_GFX600:
- Features["s-memtime-inst"] = true;
- break;
- case GK_NONE:
- break;
- default:
- llvm_unreachable("Unhandled GPU!");
- }
- } else {
- if (CPU.empty())
- CPU = "r600";
-
- switch (llvm::AMDGPU::parseArchR600(CPU)) {
- case GK_CAYMAN:
- case GK_CYPRESS:
- case GK_RV770:
- case GK_RV670:
- // TODO: Add fp64 when implemented.
- break;
- case GK_TURKS:
- case GK_CAICOS:
- case GK_BARTS:
- case GK_SUMO:
- case GK_REDWOOD:
- case GK_JUNIPER:
- case GK_CEDAR:
- case GK_RV730:
- case GK_RV710:
- case GK_RS880:
- case GK_R630:
- case GK_R600:
- break;
- default:
- llvm_unreachable("Unhandled GPU!");
- }
+ fillAMDGPUFeatureMap(CPU, getTriple(), Features);
+ if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
+ return false;
+
+ // TODO: Should move this logic into TargetParser
+ auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
+ switch (HasError.first) {
+ default:
+ break;
+ case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
+ Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
+ return false;
+ case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
+ Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
+ return false;
}
- return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
+ return true;
}
void AMDGPUTargetInfo::fillValidCPUList(
@@ -339,13 +230,19 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
!isAMDGCN(Triple));
UseAddrSpaceMapMangling = true;
+ if (isAMDGCN(Triple)) {
+ // __bf16 is always available as a load/store only type on AMDGCN.
+ BFloat16Width = BFloat16Align = 16;
+ BFloat16Format = &llvm::APFloat::BFloat();
+ }
+
HasLegalHalfType = true;
HasFloat16 = true;
- WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
+ WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
- // Set pointer width and alignment for target address space 0.
- PointerWidth = PointerAlign = getPointerWidthV(Generic);
+ // Set pointer width and alignment for the generic address space.
+ PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
if (getMaxPointerWidth() == 64) {
LongWidth = LongAlign = 64;
SizeType = UnsignedLong;
@@ -354,6 +251,10 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
}
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+ CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
+ for (auto F : {"image-insts", "gws"})
+ ReadOnlyFeatures.insert(F);
+ HalfArgsAndReturns = true;
}
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
@@ -366,8 +267,8 @@ void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
}
ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
- return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
- Builtin::FirstTSBuiltin);
+ return llvm::ArrayRef(BuiltinInfo,
+ clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
}
void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
@@ -380,29 +281,49 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
else
Builder.defineMacro("__R600__");
- if (GPUKind != llvm::AMDGPU::GK_NONE) {
- StringRef CanonName = isAMDGCN(getTriple()) ?
- getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
- Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
- if (isAMDGCN(getTriple())) {
- Builder.defineMacro("__amdgcn_processor__",
- Twine("\"") + Twine(CanonName) + Twine("\""));
- Builder.defineMacro("__amdgcn_target_id__",
- Twine("\"") + Twine(getTargetID().getValue()) +
- Twine("\""));
- for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
- auto Loc = OffloadArchFeatures.find(F);
- if (Loc != OffloadArchFeatures.end()) {
- std::string NewF = F.str();
- std::replace(NewF.begin(), NewF.end(), '-', '_');
- Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
- Twine("__"),
- Loc->second ? "1" : "0");
- }
+ // Legacy HIP host code relies on these default attributes to be defined.
+ bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
+ if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
+ return;
+
+ llvm::SmallString<16> CanonName =
+ (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind));
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10-1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
+ Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
+ // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
+ if (isAMDGCN(getTriple()) && !IsHIPHost) {
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
+ Twine("__"));
+ Builder.defineMacro("__amdgcn_processor__",
+ Twine("\"") + Twine(CanonName) + Twine("\""));
+ Builder.defineMacro("__amdgcn_target_id__",
+ Twine("\"") + Twine(*getTargetID()) + Twine("\""));
+ for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
+ auto Loc = OffloadArchFeatures.find(F);
+ if (Loc != OffloadArchFeatures.end()) {
+ std::string NewF = F.str();
+ std::replace(NewF.begin(), NewF.end(), '-', '_');
+ Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
+ Twine("__"),
+ Loc->second ? "1" : "0");
}
}
}
+ if (AllowAMDGPUUnsafeFPAtomics)
+ Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
+
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
// removed in the near future.
if (hasFMAF())
@@ -416,7 +337,10 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (hasFastFMA())
Builder.defineMacro("FP_FAST_FMA");
+ Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
+ // ToDo: deprecate this macro for naming consistency.
Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
+ Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
}
void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
@@ -429,9 +353,13 @@ void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
// supported by AMDGPU. Therefore keep its own format for these two types.
auto SaveLongDoubleFormat = LongDoubleFormat;
auto SaveFloat128Format = Float128Format;
+ auto SaveLongDoubleWidth = LongDoubleWidth;
+ auto SaveLongDoubleAlign = LongDoubleAlign;
copyAuxTarget(Aux);
LongDoubleFormat = SaveLongDoubleFormat;
Float128Format = SaveFloat128Format;
+ LongDoubleWidth = SaveLongDoubleWidth;
+ LongDoubleAlign = SaveLongDoubleAlign;
// For certain builtin types support on the host target, claim they are
// support to pass the compilation of the host code during the device-side
// compilation.