aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp227
1 files changed, 157 insertions, 70 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 7d70fa73da29..10fa9cf46737 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -15,7 +15,6 @@
#include "AMDGPUSubtarget.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
-#include "R600MachineScheduler.h"
#include "SIFrameLowering.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
@@ -32,6 +31,8 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
+AMDGPUSubtarget::~AMDGPUSubtarget() {}
+
AMDGPUSubtarget &
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
@@ -44,14 +45,11 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,";
+ FullFS += "+flat-for-global,+unaligned-buffer-access,";
FullFS += FS;
- if (GPU == "" && TT.getArch() == Triple::amdgcn)
- GPU = "SI";
-
ParseSubtargetFeatures(GPU, FullFS);
// FIXME: I don't think think Evergreen has any useful support for
@@ -61,52 +59,142 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
FP32Denormals = false;
FP64Denormals = false;
}
+
+ // Set defaults if needed.
+ if (MaxPrivateElementSize == 0)
+ MaxPrivateElementSize = 4;
+
return *this;
}
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- TargetMachine &TM)
- : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
- DumpCode(false), R600ALUInst(false), HasVertexCache(false),
- TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
- FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
- CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
- EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
- EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
- EnableXNACK(false),
- WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
- GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
- IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
- EnableSIScheduler(false), FrameLowering(nullptr),
- InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
-
+ const TargetMachine &TM)
+ : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ TargetTriple(TT),
+ Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+ IsaVersion(ISAVersion0_0_0),
+ WavefrontSize(64),
+ LocalMemorySize(0),
+ LDSBankCount(0),
+ MaxPrivateElementSize(0),
+
+ FastFMAF32(false),
+ HalfRate64Ops(false),
+
+ FP32Denormals(false),
+ FP64Denormals(false),
+ FPExceptions(false),
+ FlatForGlobal(false),
+ UnalignedBufferAccess(false),
+
+ EnableXNACK(false),
+ DebuggerInsertNops(false),
+ DebuggerReserveRegs(false),
+ DebuggerEmitPrologue(false),
+
+ EnableVGPRSpilling(false),
+ EnablePromoteAlloca(false),
+ EnableLoadStoreOpt(false),
+ EnableUnsafeDSOffsetFolding(false),
+ EnableSIScheduler(false),
+ DumpCode(false),
+
+ FP64(false),
+ IsGCN(false),
+ GCN1Encoding(false),
+ GCN3Encoding(false),
+ CIInsts(false),
+ SGPRInitBug(false),
+ HasSMemRealTime(false),
+ Has16BitInsts(false),
+ FlatAddressSpace(false),
+
+ R600ALUInst(false),
+ CaymanISA(false),
+ CFALUBug(false),
+ HasVertexCache(false),
+ TexVTXClauseSize(0),
+
+ FeatureDisable(false),
+ InstrItins(getInstrItineraryForCPU(GPU)) {
initializeSubtargetDependencies(TT, GPU, FS);
+}
- const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16)
-
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
- TLInfo.reset(new R600TargetLowering(TM, *this));
-
- // FIXME: Should have R600 specific FrameLowering
- FrameLowering.reset(new AMDGPUFrameLowering(
- TargetFrameLowering::StackGrowsUp,
- MaxStackAlign,
- 0));
- } else {
- InstrInfo.reset(new SIInstrInfo(*this));
- TLInfo.reset(new SITargetLowering(TM, *this));
- FrameLowering.reset(new SIFrameLowering(
- TargetFrameLowering::StackGrowsUp,
- MaxStackAlign,
- 0));
+// FIXME: These limits are for SI. Did they change with the larger maximum LDS
+// size?
+unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
+ switch (NWaves) {
+ case 10:
+ return 1638;
+ case 9:
+ return 1820;
+ case 8:
+ return 2048;
+ case 7:
+ return 2340;
+ case 6:
+ return 2730;
+ case 5:
+ return 3276;
+ case 4:
+ return 4096;
+ case 3:
+ return 5461;
+ case 2:
+ return 8192;
+ default:
+ return getLocalMemorySize();
}
}
-unsigned AMDGPUSubtarget::getStackEntrySize() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- switch(getWavefrontSize()) {
+unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
+ if (Bytes <= 1638)
+ return 10;
+
+ if (Bytes <= 1820)
+ return 9;
+
+ if (Bytes <= 2048)
+ return 8;
+
+ if (Bytes <= 2340)
+ return 7;
+
+ if (Bytes <= 2730)
+ return 6;
+
+ if (Bytes <= 3276)
+ return 5;
+
+ if (Bytes <= 4096)
+ return 4;
+
+ if (Bytes <= 5461)
+ return 3;
+
+ if (Bytes <= 8192)
+ return 2;
+
+ return 1;
+}
+
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM) :
+ AMDGPUSubtarget(TT, GPU, FS, TM),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ TLInfo(TM, *this) {}
+
+SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM) :
+ AMDGPUSubtarget(TT, GPU, FS, TM),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ TLInfo(TM, *this),
+ GISel() {}
+
+unsigned R600Subtarget::getStackEntrySize() const {
+ switch (getWavefrontSize()) {
case 16:
return 8;
case 32:
@@ -118,37 +206,36 @@ unsigned AMDGPUSubtarget::getStackEntrySize() const {
}
}
-unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
- switch(getGeneration()) {
- default: llvm_unreachable("ChipID unknown");
- case SEA_ISLANDS: return 12;
- }
-}
-
-AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
- return AMDGPU::getIsaVersion(getFeatureBits());
+void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const {
+ // Track register pressure so the scheduler can try to decrease
+ // pressure once register usage is above the threshold defined by
+ // SIRegisterInfo::getRegPressureSetLimit()
+ Policy.ShouldTrackPressure = true;
+
+ // Enabling both top down and bottom up scheduling seems to give us less
+ // register spills than just using one of these approaches on its own.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+
+ // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
+ if (!enableSIScheduler())
+ Policy.ShouldTrackLaneMasks = true;
}
-bool AMDGPUSubtarget::isVGPRSpillingEnabled(
- const SIMachineFunctionInfo *MFI) const {
- return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
+bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
+ return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
-void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin,
- MachineInstr *end,
- unsigned NumRegionInstrs) const {
- if (getGeneration() >= SOUTHERN_ISLANDS) {
-
- // Track register pressure so the scheduler can try to decrease
- // pressure once register usage is above the threshold defined by
- // SIRegisterInfo::getRegPressureSetLimit()
- Policy.ShouldTrackPressure = true;
-
- // Enabling both top down and bottom up scheduling seems to give us less
- // register spills than just using one of these approaches on its own.
- Policy.OnlyTopDown = false;
- Policy.OnlyBottomUp = false;
+unsigned SISubtarget::getAmdKernelCodeChipID() const {
+ switch (getGeneration()) {
+ case SEA_ISLANDS:
+ return 12;
+ default:
+ llvm_unreachable("ChipID unknown");
}
}
+AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
+ return AMDGPU::getIsaVersion(getFeatureBits());
+}