diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 227 |
1 files changed, 157 insertions, 70 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 7d70fa73da29..10fa9cf46737 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -15,7 +15,6 @@ #include "AMDGPUSubtarget.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" -#include "R600MachineScheduler.h" #include "SIFrameLowering.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" @@ -32,6 +31,8 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "AMDGPUGenSubtargetInfo.inc" +AMDGPUSubtarget::~AMDGPUSubtarget() {} + AMDGPUSubtarget & AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { @@ -44,14 +45,11 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); + SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,"; FullFS += FS; - if (GPU == "" && TT.getArch() == Triple::amdgcn) - GPU = "SI"; - ParseSubtargetFeatures(GPU, FullFS); // FIXME: I don't think think Evergreen has any useful support for @@ -61,52 +59,142 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, FP32Denormals = false; FP64Denormals = false; } + + // Set defaults if needed. + if (MaxPrivateElementSize == 0) + MaxPrivateElementSize = 4; + return *this; } AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - TargetMachine &TM) - : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false), - DumpCode(false), R600ALUInst(false), HasVertexCache(false), - TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), - FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), - CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), - EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), - EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), - EnableXNACK(false), - WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), - EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), - IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), - EnableSIScheduler(false), FrameLowering(nullptr), - InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { - + const TargetMachine &TM) + : AMDGPUGenSubtargetInfo(TT, GPU, FS), + TargetTriple(TT), + Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), + IsaVersion(ISAVersion0_0_0), + WavefrontSize(64), + LocalMemorySize(0), + LDSBankCount(0), + MaxPrivateElementSize(0), + + FastFMAF32(false), + HalfRate64Ops(false), + + FP32Denormals(false), + FP64Denormals(false), + FPExceptions(false), + FlatForGlobal(false), + UnalignedBufferAccess(false), + + EnableXNACK(false), + DebuggerInsertNops(false), + DebuggerReserveRegs(false), + DebuggerEmitPrologue(false), + + EnableVGPRSpilling(false), + EnablePromoteAlloca(false), + EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), + EnableSIScheduler(false), + DumpCode(false), + + FP64(false), + IsGCN(false), + GCN1Encoding(false), + GCN3Encoding(false), + CIInsts(false), + SGPRInitBug(false), + HasSMemRealTime(false), + Has16BitInsts(false), + FlatAddressSpace(false), + + R600ALUInst(false), + CaymanISA(false), + CFALUBug(false), + HasVertexCache(false), + TexVTXClauseSize(0), + + FeatureDisable(false), + InstrItins(getInstrItineraryForCPU(GPU)) { initializeSubtargetDependencies(TT, GPU, FS); +} - const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) - - if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - InstrInfo.reset(new R600InstrInfo(*this)); - TLInfo.reset(new R600TargetLowering(TM, *this)); - - // FIXME: Should have R600 specific FrameLowering - FrameLowering.reset(new AMDGPUFrameLowering( - TargetFrameLowering::StackGrowsUp, - MaxStackAlign, - 0)); - } else { - InstrInfo.reset(new SIInstrInfo(*this)); - TLInfo.reset(new SITargetLowering(TM, *this)); - FrameLowering.reset(new SIFrameLowering( - TargetFrameLowering::StackGrowsUp, - MaxStackAlign, - 0)); +// FIXME: These limits are for SI. Did they change with the larger maximum LDS +// size? +unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const { + switch (NWaves) { + case 10: + return 1638; + case 9: + return 1820; + case 8: + return 2048; + case 7: + return 2340; + case 6: + return 2730; + case 5: + return 3276; + case 4: + return 4096; + case 3: + return 5461; + case 2: + return 8192; + default: + return getLocalMemorySize(); } } -unsigned AMDGPUSubtarget::getStackEntrySize() const { - assert(getGeneration() <= NORTHERN_ISLANDS); - switch(getWavefrontSize()) { +unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const { + if (Bytes <= 1638) + return 10; + + if (Bytes <= 1820) + return 9; + + if (Bytes <= 2048) + return 8; + + if (Bytes <= 2340) + return 7; + + if (Bytes <= 2730) + return 6; + + if (Bytes <= 3276) + return 5; + + if (Bytes <= 4096) + return 4; + + if (Bytes <= 5461) + return 3; + + if (Bytes <= 8192) + return 2; + + return 1; +} + +R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) : + AMDGPUSubtarget(TT, GPU, FS, TM), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) {} + +SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) : + AMDGPUSubtarget(TT, GPU, FS, TM), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this), + GISel() {} + +unsigned R600Subtarget::getStackEntrySize() const { + switch (getWavefrontSize()) { case 16: return 8; case 32: @@ -118,37 +206,36 @@ unsigned AMDGPUSubtarget::getStackEntrySize() const { } } -unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { - switch(getGeneration()) { - default: llvm_unreachable("ChipID unknown"); - case SEA_ISLANDS: return 12; - } -} - -AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const { - return AMDGPU::getIsaVersion(getFeatureBits()); +void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const { + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + + // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. + if (!enableSIScheduler()) + Policy.ShouldTrackLaneMasks = true; } -bool AMDGPUSubtarget::isVGPRSpillingEnabled( - const SIMachineFunctionInfo *MFI) const { - return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; +bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { + return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, - MachineInstr *end, - unsigned NumRegionInstrs) const { - if (getGeneration() >= SOUTHERN_ISLANDS) { - - // Track register pressure so the scheduler can try to decrease - // pressure once register usage is above the threshold defined by - // SIRegisterInfo::getRegPressureSetLimit() - Policy.ShouldTrackPressure = true; - - // Enabling both top down and bottom up scheduling seems to give us less - // register spills than just using one of these approaches on its own. - Policy.OnlyTopDown = false; - Policy.OnlyBottomUp = false; +unsigned SISubtarget::getAmdKernelCodeChipID() const { + switch (getGeneration()) { + case SEA_ISLANDS: + return 12; + default: + llvm_unreachable("ChipID unknown"); } } +AMDGPU::IsaVersion SISubtarget::getIsaVersion() const { + return AMDGPU::getIsaVersion(getFeatureBits()); +} |