aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h389
1 files changed, 261 insertions, 128 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 49c94f1eceb8..3fe61aa449e0 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,12 +16,14 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUISelLowering.h"
-#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "R600ISelLowering.h"
+#include "R600FrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIFrameLowering.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
@@ -30,9 +32,9 @@
namespace llvm {
class SIMachineFunctionInfo;
+class StringRef;
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-
public:
enum Generation {
R600 = 0,
@@ -45,10 +47,6 @@ public:
};
enum {
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
- };
-
- enum {
ISAVersion0_0_0,
ISAVersion7_0_0,
ISAVersion7_0_1,
@@ -57,114 +55,116 @@ public:
ISAVersion8_0_3
};
-private:
- std::string DevName;
- bool Is64bit;
- bool DumpCode;
- bool R600ALUInst;
- bool HasVertexCache;
- short TexVTXClauseSize;
+protected:
+ // Basic subtarget description.
+ Triple TargetTriple;
Generation Gen;
- bool FP64;
- bool FP64Denormals;
- bool FP32Denormals;
+ unsigned IsaVersion;
+ unsigned WavefrontSize;
+ int LocalMemorySize;
+ int LDSBankCount;
+ unsigned MaxPrivateElementSize;
+
+ // Possibly statically set by tablegen, but may want to be overridden.
bool FastFMAF32;
- bool CaymanISA;
- bool FlatAddressSpace;
+ bool HalfRate64Ops;
+
+ // Dynamially set bits that enable features.
+ bool FP32Denormals;
+ bool FP64Denormals;
+ bool FPExceptions;
bool FlatForGlobal;
- bool EnableIRStructurizer;
+ bool UnalignedBufferAccess;
+ bool EnableXNACK;
+ bool DebuggerInsertNops;
+ bool DebuggerReserveRegs;
+ bool DebuggerEmitPrologue;
+
+ // Used as options.
+ bool EnableVGPRSpilling;
bool EnablePromoteAlloca;
- bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
- bool EnableXNACK;
- unsigned WavefrontSize;
- bool CFALUBug;
- int LocalMemorySize;
- bool EnableVGPRSpilling;
- bool SGPRInitBug;
+ bool EnableSIScheduler;
+ bool DumpCode;
+
+ // Subtarget statically properties set by tablegen
+ bool FP64;
bool IsGCN;
bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
+ bool SGPRInitBug;
+ bool HasSMemRealTime;
+ bool Has16BitInsts;
+ bool FlatAddressSpace;
+ bool R600ALUInst;
+ bool CaymanISA;
+ bool CFALUBug;
+ bool HasVertexCache;
+ short TexVTXClauseSize;
+
+ // Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
- int LDSBankCount;
- unsigned IsaVersion;
- bool EnableHugeScratchBuffer;
- bool EnableSIScheduler;
- std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
- std::unique_ptr<AMDGPUTargetLowering> TLInfo;
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
- Triple TargetTriple;
public:
- AMDGPUSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
- TargetMachine &TM);
+ AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM);
+ virtual ~AMDGPUSubtarget();
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
- const AMDGPUFrameLowering *getFrameLowering() const override {
- return FrameLowering.get();
- }
- const AMDGPUInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
- }
- const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
- }
- AMDGPUTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
- }
+ const AMDGPUInstrInfo *getInstrInfo() const override;
+ const AMDGPUFrameLowering *getFrameLowering() const override;
+ const AMDGPUTargetLowering *getTargetLowering() const override;
+ const AMDGPURegisterInfo *getRegisterInfo() const override;
+
const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool is64bit() const {
- return Is64bit;
- }
-
- bool hasVertexCache() const {
- return HasVertexCache;
- }
-
- short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
+ bool isAmdHsaOS() const {
+ return TargetTriple.getOS() == Triple::AMDHSA;
}
Generation getGeneration() const {
return Gen;
}
- bool hasHWFP64() const {
- return FP64;
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
}
- bool hasCaymanISA() const {
- return CaymanISA;
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
}
- bool hasFP32Denormals() const {
- return FP32Denormals;
+ int getLDSBankCount() const {
+ return LDSBankCount;
}
- bool hasFP64Denormals() const {
- return FP64Denormals;
+ unsigned getMaxPrivateElementSize() const {
+ return MaxPrivateElementSize;
+ }
+
+ bool hasHWFP64() const {
+ return FP64;
}
bool hasFastFMAF32() const {
return FastFMAF32;
}
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
+ bool hasHalfRate64Ops() const {
+ return HalfRate64Ops;
}
- bool useFlatForGlobal() const {
- return FlatForGlobal;
+ bool hasAddr64() const {
+ return (getGeneration() < VOLCANIC_ISLANDS);
}
bool hasBFE() const {
@@ -214,116 +214,249 @@ public:
return (getGeneration() >= EVERGREEN);
}
- bool IsIRStructurizerEnabled() const {
- return EnableIRStructurizer;
+ bool hasCaymanISA() const {
+ return CaymanISA;
}
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
- bool isIfCvtEnabled() const {
- return EnableIfCvt;
+ bool unsafeDSOffsetFoldingEnabled() const {
+ return EnableUnsafeDSOffsetFolding;
}
- bool loadStoreOptEnabled() const {
- return EnableLoadStoreOpt;
+ bool dumpCode() const {
+ return DumpCode;
}
- bool unsafeDSOffsetFoldingEnabled() const {
- return EnableUnsafeDSOffsetFolding;
+ /// Return the amount of LDS that can be used that will not restrict the
+ /// occupancy lower than WaveCount.
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
+
+ /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
+ /// the given LDS memory size is the only constraint.
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
+
+
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
}
- unsigned getWavefrontSize() const {
- return WavefrontSize;
+ bool hasFP64Denormals() const {
+ return FP64Denormals;
}
- unsigned getStackEntrySize() const;
+ bool hasFPExceptions() const {
+ return FPExceptions;
+ }
- bool hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
}
- int getLocalMemorySize() const {
- return LocalMemorySize;
+ bool hasUnalignedBufferAccess() const {
+ return UnalignedBufferAccess;
}
- bool hasSGPRInitBug() const {
- return SGPRInitBug;
+ bool isXNACKEnabled() const {
+ return EnableXNACK;
}
- int getLDSBankCount() const {
- return LDSBankCount;
+ unsigned getMaxWavesPerCU() const {
+ if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 10;
+
+ // FIXME: Not sure what this is for other subtagets.
+ return 8;
}
- unsigned getAmdKernelCodeChipID() const;
+ /// \brief Returns the offset in bytes from the start of the input buffer
+ /// of the first explicit kernel argument.
+ unsigned getExplicitKernelArgOffset() const {
+ return isAmdHsaOS() ? 0 : 36;
+ }
- AMDGPU::IsaVersion getIsaVersion() const;
+ unsigned getStackAlignment() const {
+ // Scratch is allocated in 256 dword per wave blocks.
+ return 4 * 256 / getWavefrontSize();
+ }
bool enableMachineScheduler() const override {
return true;
}
- void overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin, MachineInstr *end,
- unsigned NumRegionInstrs) const override;
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+};
- // Helper functions to simplify if statements
- bool isTargetELF() const {
- return false;
+class R600Subtarget final : public AMDGPUSubtarget {
+private:
+ R600InstrInfo InstrInfo;
+ R600FrameLowering FrameLowering;
+ R600TargetLowering TLInfo;
+
+public:
+ R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const R600InstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
}
- StringRef getDeviceName() const {
- return DevName;
+ const R600FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
}
- bool enableHugeScratchBuffer() const {
- return EnableHugeScratchBuffer;
+ const R600TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
}
- bool enableSIScheduler() const {
- return EnableSIScheduler;
+ const R600RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
}
- bool dumpCode() const {
- return DumpCode;
+ bool hasCFAluBug() const {
+ return CFALUBug;
}
- bool r600ALUEncoding() const {
- return R600ALUInst;
+
+ bool hasVertexCache() const {
+ return HasVertexCache;
}
- bool isAmdHsaOS() const {
- return TargetTriple.getOS() == Triple::AMDHSA;
+
+ short getTexVTXClauseSize() const {
+ return TexVTXClauseSize;
}
- bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
- bool isXNACKEnabled() const {
- return EnableXNACK;
+ unsigned getStackEntrySize() const;
+};
+
+class SISubtarget final : public AMDGPUSubtarget {
+public:
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
+private:
+ SIInstrInfo InstrInfo;
+ SIFrameLowering FrameLowering;
+ SITargetLowering TLInfo;
+ std::unique_ptr<GISelAccessor> GISel;
+
+public:
+ SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const SIInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
}
- unsigned getMaxWavesPerCU() const {
- if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 10;
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
- // FIXME: Not sure what this is for other subtagets.
- llvm_unreachable("do not know max waves per CU for this subtarget.");
+ const SITargetLowering *getTargetLowering() const override {
+ return &TLInfo;
}
- bool enableSubRegLiveness() const override {
- return true;
+ const CallLowering *getCallLowering() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getCallLowering();
}
- /// \brief Returns the offset in bytes from the start of the input buffer
- /// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset() const {
- return isAmdHsaOS() ? 0 : 36;
+ const SIRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ void setGISelAccessor(GISelAccessor &GISel) {
+ this->GISel.reset(&GISel);
}
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
+
+ bool isVGPRSpillingEnabled(const Function& F) const;
+
+ unsigned getAmdKernelCodeChipID() const;
+
+ AMDGPU::IsaVersion getIsaVersion() const;
+
unsigned getMaxNumUserSGPRs() const {
return 16;
}
+
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
+ bool hasSMemRealTime() const {
+ return HasSMemRealTime;
+ }
+
+ bool has16BitInsts() const {
+ return Has16BitInsts;
+ }
+
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
+ }
+
+ bool debuggerSupported() const {
+ return debuggerInsertNops() && debuggerReserveRegs() &&
+ debuggerEmitPrologue();
+ }
+
+ bool debuggerInsertNops() const {
+ return DebuggerInsertNops;
+ }
+
+ bool debuggerReserveRegs() const {
+ return DebuggerReserveRegs;
+ }
+
+ bool debuggerEmitPrologue() const {
+ return DebuggerEmitPrologue;
+ }
+
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
};
+
+inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getInstrInfo();
+
+ return static_cast<const R600Subtarget *>(this)->getInstrInfo();
+}
+
+inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getFrameLowering();
+
+ return static_cast<const R600Subtarget *>(this)->getFrameLowering();
+}
+
+inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getTargetLowering();
+
+ return static_cast<const R600Subtarget *>(this)->getTargetLowering();
+}
+
+inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getRegisterInfo();
+
+ return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
+}
+
} // End namespace llvm
#endif