aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX/NVPTXTargetMachine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX/NVPTXTargetMachine.cpp')
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp129
1 files changed, 92 insertions, 37 deletions
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index aa931b134da9..b9f5919964c7 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -44,15 +45,23 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
using namespace llvm;
+static cl::opt<bool> UseInferAddressSpaces(
+ "nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,
+ cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
+ "NVPTXFavorNonGenericAddrSpaces"));
+
namespace llvm {
+void initializeNVVMIntrRangePass(PassRegistry&);
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
@@ -67,10 +76,12 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeNVVMReflectPass(PR);
+ initializeNVVMIntrRangePass(PR);
initializeGenericToNVVMPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXInferAddressSpacesPass(PR);
initializeNVPTXLowerKernelArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
@@ -90,11 +101,15 @@ static std::string computeDataLayout(bool is64Bit) {
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
- CM, OL),
- is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ // The pic relocation model is used regardless of what the client has
+ // specified, as it is the only relocation model currently supported.
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
+ Reloc::PIC_, CM, OL),
+ is64bit(is64bit),
+ TLOF(make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
@@ -110,7 +125,8 @@ void NVPTXTargetMachine32::anchor() {}
NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
@@ -119,7 +135,8 @@ void NVPTXTargetMachine64::anchor() {}
NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
@@ -143,14 +160,25 @@ public:
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
private:
- // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
+ // function is only called in opt mode.
void addEarlyCSEOrGVNPass();
+
+ // Add passes that propagate special memory spaces.
+ void addAddressSpaceInferencePasses();
+
+ // Add passes that perform straight-line scalar optimizations.
+ void addStraightLineScalarOptimizationPasses();
};
} // end anonymous namespace
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
- NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
- return PassConfig;
+ return new NVPTXPassConfig(this, PM);
+}
+
+void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
+ PM.add(createNVVMReflectPass());
+ PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -166,34 +194,23 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
addPass(createEarlyCSEPass());
}
-void NVPTXPassConfig::addIRPasses() {
- // The following passes are known to not play well with virtual regs hanging
- // around after register allocation (which in our case, is *all* registers).
- // We explicitly disable them here. We do, however, need some functionality
- // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
- // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
- disablePass(&PrologEpilogCodeInserterID);
- disablePass(&MachineCopyPropagationID);
- disablePass(&TailDuplicateID);
-
- addPass(createNVVMReflectPass());
- addPass(createNVPTXImageOptimizerPass());
- addPass(createNVPTXAssignValidGlobalNamesPass());
- addPass(createGenericToNVVMPass());
-
- // === Propagate special address spaces ===
- addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+void NVPTXPassConfig::addAddressSpaceInferencePasses() {
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
- addPass(createNVPTXFavorNonGenericAddrSpacesPass());
- // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
- // them unused. We could remove dead code in an ad-hoc manner, but that
- // requires manual work and might be error-prone.
- addPass(createDeadCodeEliminationPass());
+ if (UseInferAddressSpaces) {
+ addPass(createNVPTXInferAddressSpacesPass());
+ } else {
+ addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
+ // them unused. We could remove dead code in an ad-hoc manner, but that
+ // requires manual work and might be error-prone.
+ addPass(createDeadCodeEliminationPass());
+ }
+}
- // === Straight-line scalar optimizations ===
+void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
@@ -208,6 +225,41 @@ void NVPTXPassConfig::addIRPasses() {
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+}
+
+void NVPTXPassConfig::addIRPasses() {
+ // The following passes are known to not play well with virtual regs hanging
+ // around after register allocation (which in our case, is *all* registers).
+ // We explicitly disable them here. We do, however, need some functionality
+ // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
+ // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
+ disablePass(&PrologEpilogCodeInserterID);
+ disablePass(&MachineCopyPropagationID);
+ disablePass(&TailDuplicateID);
+ disablePass(&StackMapLivenessID);
+ disablePass(&LiveDebugValuesID);
+ disablePass(&PostRASchedulerID);
+ disablePass(&FuncletLayoutID);
+ disablePass(&PatchableFunctionID);
+
+ // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
+ // it here does nothing. But since we need it for correctness when lowering
+ // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
+ // call addEarlyAsPossiblePasses.
+ addPass(createNVVMReflectPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createNVPTXImageOptimizerPass());
+ addPass(createNVPTXAssignValidGlobalNamesPass());
+ addPass(createGenericToNVVMPass());
+
+ // NVPTXLowerKernelArgs is required for correctness and should be run right
+ // before the address space inference passes.
+ addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+ if (getOptLevel() != CodeGenOpt::None) {
+ addAddressSpaceInferencePasses();
+ addStraightLineScalarOptimizationPasses();
+ }
// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();
@@ -223,7 +275,8 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- addEarlyCSEOrGVNPass();
+ if (getOptLevel() != CodeGenOpt::None)
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
@@ -241,10 +294,12 @@ bool NVPTXPassConfig::addInstSelector() {
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
- // NVPTXPrologEpilogPass calculates frame object offset and replace frame
- // index with VRFrame register. NVPTXPeephole need to be run after that and
- // will replace VRFrame with VRFrameLocal when possible.
- addPass(createNVPTXPeephole());
+ if (getOptLevel() != CodeGenOpt::None) {
+ // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+ // index with VRFrame register. NVPTXPeephole need to be run after that and
+ // will replace VRFrame with VRFrameLocal when possible.
+ addPass(createNVPTXPeephole());
+ }
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {