aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Passes/PassBuilder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Passes/PassBuilder.cpp')
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp74
1 files changed, 50 insertions, 24 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1aaccb510f8c..53b7db8689c4 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -61,6 +61,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
@@ -85,6 +86,7 @@
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
@@ -145,6 +147,7 @@
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
@@ -169,6 +172,7 @@
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
@@ -189,6 +193,11 @@ static cl::opt<bool>
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
+static cl::opt<int> PreInlineThreshold(
+ "npm-preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining in pre-instrumentation inliner "
+ "(default = 75)"));
+
static cl::opt<bool>
RunNewGVN("enable-npm-newgvn", cl::init(false),
cl::Hidden, cl::ZeroOrMore,
@@ -398,21 +407,25 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
+ if (Level > O1) {
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
+ }
}
// Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass());
+ if (Level > O1) {
+ FPM.addPass(SpeculativeExecutionPass());
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ }
FPM.addPass(SimplifyCFGPass());
if (Level == O3)
FPM.addPass(AggressiveInstCombinePass());
@@ -426,10 +439,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !isOptimizingForSize(Level))
+ !isOptimizingForSize(Level) && Level > O1)
FPM.addPass(PGOMemOPSizeOpt());
- FPM.addPass(TailCallElimPass());
+ // TODO: Investigate the cost/benefit of tail call elimination on debugging.
+ if (Level > O1)
+ FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
@@ -456,6 +471,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
+ // TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
@@ -490,6 +506,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
+ // Delete small array after loop unroll.
+ FPM.addPass(SROA());
+
// Eliminate redundancies.
if (Level != O1) {
// These passes add substantial compile time so skip them at O1.
@@ -520,18 +539,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
+ if (Level > O1) {
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ EnableMSSALoopDependency, DebugLogging));
+ }
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
@@ -559,8 +581,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
if (!isOptimizingForSize(Level) && !IsCS) {
InlineParams IP;
- // In the old pass manager, this is a cl::opt. Should still this be one?
- IP.DefaultThreshold = 75;
+ IP.DefaultThreshold = PreInlineThreshold;
// FIXME: The hint threshold has the same value used by the regular inliner.
// This should probably be lowered after performance testing.
@@ -954,8 +975,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- OptimizePM.addPass(
- createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
+ OptimizePM.addPass(LoopUnrollAndJamPass(Level));
}
OptimizePM.addPass(LoopUnrollPass(
LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
@@ -1445,7 +1465,7 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
Expected<ParametersT> Result = Parser(Params);
assert((Result || Result.template errorIsA<StringError>()) &&
"Pass parameter parser can only return StringErrors.");
- return std::move(Result);
+ return Result;
}
/// Parser of parameters for LoopUnroll pass.
@@ -1887,6 +1907,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
return Error::success();
}
+ // This is consistent with old pass manager invoked via opt, but
+ // inconsistent with clang. Clang doesn't enable loop vectorization
+ // but does enable slp vectorization at Oz.
+ PTO.LoopVectorization = L > O1 && L < Oz;
+ PTO.SLPVectorization = L > O1 && L < Oz;
+
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
} else if (Matches[1] == "thinlto-pre-link") {