aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/PassManagerBuilder.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp277
1 files changed, 157 insertions, 120 deletions
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 068328391dff..aa916345954d 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -27,8 +27,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
@@ -42,6 +44,7 @@
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
@@ -51,6 +54,7 @@
using namespace llvm;
+namespace llvm {
cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
@@ -72,7 +76,6 @@ cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
// Experimental option to use CFL-AA
-enum class CFLAAType { None, Steensgaard, Andersen, Both };
static cl::opt<::CFLAAType>
UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden,
cl::desc("Enable the new, experimental CFL alias analysis"),
@@ -84,9 +87,9 @@ static cl::opt<::CFLAAType>
clEnumValN(::CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
-static cl::opt<bool> EnableLoopInterchange(
+cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
- cl::desc("Enable the new, experimental LoopInterchange Pass"));
+ cl::desc("Enable the experimental LoopInterchange Pass"));
cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false),
cl::Hidden,
@@ -96,6 +99,10 @@ cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));
+cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread",
+ cl::desc("Enable DFA jump threading."),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
@@ -165,6 +172,10 @@ cl::opt<bool> EnableConstraintElimination(
cl::desc(
"Enable pass to eliminate conditions based on linear constraints."));
+cl::opt<bool> EnableFunctionSpecialization(
+ "enable-function-specialization", cl::init(false), cl::Hidden,
+ cl::desc("Enable Function Specialization pass"));
+
cl::opt<AttributorRunOption> AttributorRun(
"attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
cl::desc("Enable the attributor inter-procedural deduction pass."),
@@ -178,6 +189,7 @@ cl::opt<AttributorRunOption> AttributorRun(
"disable attributor runs")));
extern cl::opt<bool> EnableKnowledgeRetention;
+} // namespace llvm
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
@@ -300,7 +312,6 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
void PassManagerBuilder::populateFunctionPassManager(
legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
- FPM.add(createEntryExitInstrumenterPass());
// Add LibraryInfo if we have some.
if (LibraryInfo)
@@ -317,10 +328,12 @@ void PassManagerBuilder::populateFunctionPassManager(
addInitialAliasAnalysisPasses(FPM);
+ // Lower llvm.expect to metadata before attempting transforms.
+ // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
+ FPM.add(createLowerExpectIntrinsicPass());
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
- FPM.add(createLowerExpectIntrinsicPass());
}
// Do PGO instrumentation generation or use pass as the option specified.
@@ -432,6 +445,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLoopInstSimplifyPass());
MPM.add(createLoopSimplifyCFGPass());
}
+ // Try to remove as much code from the loop header as possible,
+ // to reduce amount of IR that will have to be duplicated.
+ // TODO: Investigate promotion cap for O1.
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
@@ -441,7 +458,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
else
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
// FIXME: We break the loop pass pipeline here in order to do full
- // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
+ // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
@@ -472,7 +489,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
}
- MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
if (EnableConstraintElimination)
@@ -488,11 +504,15 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1) {
+ if (EnableDFAJumpThreading && SizeLevel == 0)
+ MPM.add(createDFAJumpThreadingPass());
+
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
// TODO: Investigate if this is too expensive at O1.
if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
@@ -504,7 +524,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ // Merge & remove BBs and sink & hoist common instructions.
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
// Clean up after everything.
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
@@ -514,6 +536,120 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createControlHeightReductionLegacyPass());
}
+/// FIXME: Should LTO cause any differences to this set of passes?
+void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
+ bool IsFullLTO) {
+ PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
+
+ if (IsFullLTO) {
+ // The vectorizer may have significantly shortened a loop body; unroll
+ // again. Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && !DisableUnrollLoops)
+ PM.add(createLoopUnrollAndJamPass(OptLevel));
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
+ PM.add(createWarnMissedTransformationsPass());
+ }
+
+ if (!IsFullLTO) {
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ PM.add(createLoopLoadEliminationPass());
+ }
+ // Cleanup after the loop optimization passes.
+ PM.add(createInstructionCombiningPass());
+
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ PM.add(createEarlyCSEPass());
+ PM.add(createCorrelatedValuePropagationPass());
+ PM.add(createInstructionCombiningPass());
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ PM.add(createCFGSimplificationPass());
+ PM.add(createInstructionCombiningPass());
+ }
+
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
+
+ if (IsFullLTO) {
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+ }
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ if (SLPVectorize) {
+ PM.add(createSLPVectorizerPass());
+ if (OptLevel > 1 && ExtraVectorizerPasses)
+ PM.add(createEarlyCSEPass());
+ }
+
+ // Enhance/cleanup vector code.
+ PM.add(createVectorCombinePass());
+
+ if (!IsFullLTO) {
+ addExtensionsToPM(EP_Peephole, PM);
+ PM.add(createInstructionCombiningPass());
+
+ if (EnableUnrollAndJam && !DisableUnrollLoops) {
+ // Unroll and Jam. We do this before unroll but need to be in a separate
+ // loop pass manager in order for the outer loop to be processed by
+ // unroll and jam before the inner loop is unrolled.
+ PM.add(createLoopUnrollAndJamPass(OptLevel));
+ }
+
+ // Unroll small loops
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
+
+ if (!DisableUnrollLoops) {
+ // LoopUnroll may generate some redundency to cleanup.
+ PM.add(createInstructionCombiningPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ }
+
+ PM.add(createWarnMissedTransformationsPass());
+ }
+
+ // After vectorization and unrolling, assume intrinsics may tell us more
+ // about pointer alignments.
+ PM.add(createAlignmentFromAssumptionsPass());
+
+ if (IsFullLTO)
+ PM.add(createInstructionCombiningPass());
+}
+
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
// Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
@@ -616,6 +752,10 @@ void PassManagerBuilder::populateModulePassManager(
if (OptLevel > 2)
MPM.add(createCallSiteSplittingPass());
+ // Propage constant function arguments by specializing the functions.
+ if (OptLevel > 2 && EnableFunctionSpecialization)
+ MPM.add(createFunctionSpecializationPass());
+
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createCalledValuePropagationPass());
@@ -663,7 +803,7 @@ void PassManagerBuilder::populateModulePassManager(
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (OptLevel > 1)
- MPM.add(createOpenMPOptLegacyPass());
+ MPM.add(createOpenMPOptCGSCCLegacyPass());
MPM.add(createPostOrderFunctionAttrsLegacyPass());
if (OptLevel > 2)
@@ -785,88 +925,7 @@ void PassManagerBuilder::populateModulePassManager(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass());
- MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
-
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- MPM.add(createLoopLoadEliminationPass());
-
- // FIXME: Because of #pragma vectorize enable, the passes below are always
- // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
- // on -O1 and no #pragma is found). Would be good to have these two passes
- // as function calls, so that we can only pass them when the vectorizer
- // changed the code.
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correllated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- MPM.add(createEarlyCSEPass());
- MPM.add(createCorrelatedValuePropagationPass());
- MPM.add(createInstructionCombiningPass());
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
- MPM.add(createCFGSimplificationPass());
- MPM.add(createInstructionCombiningPass());
- }
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- // FIXME: study whether hoisting and/or sinking of common instructions should
- // be delayed until after SLP vectorizer.
- MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- MPM.add(createEarlyCSEPass());
- }
- }
-
- // Enhance/cleanup vector code.
- MPM.add(createVectorCombinePass());
-
- addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createInstructionCombiningPass());
-
- if (EnableUnrollAndJam && !DisableUnrollLoops) {
- // Unroll and Jam. We do this before unroll but need to be in a separate
- // loop pass manager in order for the outer loop to be processed by
- // unroll and jam before the inner loop is unrolled.
- MPM.add(createLoopUnrollAndJamPass(OptLevel));
- }
-
- // Unroll small loops
- MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
-
- if (!DisableUnrollLoops) {
- // LoopUnroll may generate some redundency to cleanup.
- MPM.add(createInstructionCombiningPass());
-
- // Runtime unrolling will introduce runtime check in loop prologue. If the
- // unrolled loop is a inner loop, then the prologue will be inside the
- // outer loop. LICM pass can help to promote the runtime check out if the
- // checked value is loop invariant.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- }
-
- MPM.add(createWarnMissedTransformationsPass());
-
- // After vectorization and unrolling, assume intrinsics may tell us more
- // about pointer alignments.
- MPM.add(createAlignmentFromAssumptionsPass());
+ addVectorPasses(MPM, /* IsFullLTO */ false);
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
@@ -952,6 +1011,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(
createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
+ // Propage constant function arguments by specializing the functions.
+ if (EnableFunctionSpecialization)
+ PM.add(createFunctionSpecializationPass());
+
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
@@ -1023,7 +1086,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (OptLevel > 1)
- PM.add(createOpenMPOptLegacyPass());
+ PM.add(createOpenMPOptCGSCCLegacyPass());
// Optimize globals again if we ran the inliner.
if (RunInliner)
@@ -1076,35 +1139,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createLoopDistributePass());
- PM.add(createLoopVectorizePass(true, !LoopVectorize));
- // The vectorizer may have significantly shortened a loop body; unroll again.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
-
- PM.add(createWarnMissedTransformationsPass());
-
- // Now that we've optimized loops (in particular loop induction variables),
- // we may have exposed more scalar opportunities. Run parts of the scalar
- // optimizer again at this point.
- PM.add(createInstructionCombiningPass()); // Initial cleanup
- PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
- .hoistCommonInsts(true)));
- PM.add(createSCCPPass()); // Propagate exposed constants
- PM.add(createInstructionCombiningPass()); // Clean up again
- PM.add(createBitTrackingDCEPass());
-
- // More scalar chains could be vectorized due to more alias information
- if (SLPVectorize)
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- PM.add(createVectorCombinePass()); // Clean up partial vectorization.
-
- // After vectorization, assume intrinsics may tell us more about pointer
- // alignments.
- PM.add(createAlignmentFromAssumptionsPass());
- // Cleanup and simplify the code after the scalar optimizations.
- PM.add(createInstructionCombiningPass());
+ addVectorPasses(PM, /* IsFullLTO */ true);
+
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));