src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2015-06-09 19:06:30 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-06-09 19:06:30 +0000
commit	85d8b2bbe386bcfe669575d05b61482d7be07e5d (patch)
tree	1dc5e75ab222a9ead44c699eceafab7a6ca7b310 /lib/Transforms/Scalar
parent	5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff)
download	src-85d8b2bbe386bcfe669575d05b61482d7be07e5d.tar.gz src-85d8b2bbe386bcfe669575d05b61482d7be07e5d.zip

Vendor import of llvm trunk r239412:vendor/llvm/llvm-trunk-r239412

https://llvm.org/svn/llvm-project/llvm/trunk@239412

Notes

Notes: svn path=/vendor/llvm/dist/; revision=284184 svn path=/vendor/llvm/llvm-trunk-r239412/; revision=284185; tag=vendor/llvm/llvm-trunk-r239412

Diffstat (limited to 'lib/Transforms/Scalar')

-rw-r--r--

lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

-rw-r--r--

lib/Transforms/Scalar/DeadStoreElimination.cpp

-rw-r--r--

lib/Transforms/Scalar/IndVarSimplify.cpp

152

-rw-r--r--

lib/Transforms/Scalar/LoopStrengthReduce.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopUnrollPass.cpp

517

-rw-r--r--

lib/Transforms/Scalar/MemCpyOptimizer.cpp

-rw-r--r--

lib/Transforms/Scalar/MergedLoadStoreMotion.cpp

-rw-r--r--

lib/Transforms/Scalar/NaryReassociate.cpp

-rw-r--r--

lib/Transforms/Scalar/PlaceSafepoints.cpp

-rw-r--r--

lib/Transforms/Scalar/Reassociate.cpp

-rw-r--r--

lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

131

-rw-r--r--

lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

-rw-r--r--

lib/Transforms/Scalar/SimplifyCFGPass.cpp

-rw-r--r--

lib/Transforms/Scalar/Sink.cpp

14 files changed, 564 insertions, 414 deletions

diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d1302c6e22f4..79624b2e4c47 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

@@ -113,10 +113,11 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {

Value *Condition = SI->getCondition();

if (!Condition->getType()->isVectorTy()) {

- if (Constant *C = LVI->getConstantOnEdge(Condition, P->getIncomingBlock(i), BB, P)) {

- if (C == ConstantInt::getTrue(Condition->getType())) {

+ if (Constant *C = LVI->getConstantOnEdge(

+ Condition, P->getIncomingBlock(i), BB, P)) {

+ if (C->isOneValue()) {

V = SI->getTrueValue();

- } else {

+ } else if (C->isZeroValue()) {

V = SI->getFalseValue();

}

// Once LVI learns to handle vector types, we could also add support

diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 01952cf6e8b3..eb48a766a2cf 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp

@@ -197,11 +197,11 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {

static AliasAnalysis::Location

getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {

if (StoreInst *SI = dyn_cast<StoreInst>(Inst))

- return AA.getLocation(SI);

+ return MemoryLocation::get(SI);

if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {

// memcpy/memmove/memset.

- AliasAnalysis::Location Loc = AA.getLocationForDest(MI);

+ AliasAnalysis::Location Loc = MemoryLocation::getForDest(MI);

return Loc;

}

@@ -231,7 +231,7 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {

// The only instructions that both read and write are the mem transfer

// instructions (memcpy/memmove).

if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))

- return AA.getLocationForSource(MTI);

+ return MemoryLocation::getForSource(MTI);

return AliasAnalysis::Location();

}

@@ -815,11 +815,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {

if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {

if (!L->isUnordered()) // Be conservative with atomic/volatile load

break;

- LoadedLoc = AA->getLocation(L);

+ LoadedLoc = MemoryLocation::get(L);

} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {

- LoadedLoc = AA->getLocation(V);

+ LoadedLoc = MemoryLocation::get(V);

} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {

- LoadedLoc = AA->getLocationForSource(MTI);

+ LoadedLoc = MemoryLocation::getForSource(MTI);

} else if (!BBI->mayReadFromMemory()) {

// Instruction doesn't read memory. Note that stores that weren't removed

// above will hit this case.

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 600589c904c4..359a616c069d 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp

@@ -68,6 +68,22 @@ static cl::opt<bool> VerifyIndvars(

static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,

cl::desc("Reduce live induction variables."));

+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };

+static cl::opt<ReplaceExitVal> ReplaceExitValue(

+ "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),

+ cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),

+ cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),

+ clEnumValN(OnlyCheapRepl, "cheap",

+ "only replace exit value when the cost is cheap"),

+ clEnumValN(AlwaysRepl, "always",

+ "always replace exit value whenever possible"),

+ clEnumValEnd));

+namespace {

+struct RewritePhi;

namespace {

class IndVarSimplify : public LoopPass {

LoopInfo *LI;

@@ -112,6 +128,7 @@ namespace {

void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);

+ bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);

void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);

Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,

@@ -464,6 +481,21 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {

SE->forgetLoop(L);

}

+namespace {

+// Collect information about PHI nodes which can be transformed in

+// RewriteLoopExitValues.

+struct RewritePhi {

+ PHINode *PN;

+ unsigned Ith; // Ith incoming value.

+ Value *Val; // Exit value after expansion.

+ bool HighCost; // High Cost when expansion.

+ bool SafePhi; // LCSSASafePhiForRAUW.

+ RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)

+ : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}

+};

//===----------------------------------------------------------------------===//

// RewriteLoopExitValues - Optimize IV users outside the loop.

// As a side effect, reduces the amount of IV processing within the loop.

@@ -486,6 +518,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {

SmallVector<BasicBlock*, 8> ExitBlocks;

L->getUniqueExitBlocks(ExitBlocks);

+ SmallVector<RewritePhi, 8> RewritePhiSet;

// Find all values that are computed inside the loop, but used outside of it.

// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan

// the exit blocks of the loop to find them.

@@ -604,23 +637,44 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {

DeadInsts.push_back(ExitVal);

continue;

}

- Changed = true;

- ++NumReplaced;

+ bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);

- PN->setIncomingValue(i, ExitVal);

+ // Collect all the candidate PHINodes to be rewritten.

+ RewritePhiSet.push_back(

+ RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));

+ }

- // If this instruction is dead now, delete it. Don't do it now to avoid

- // invalidating iterators.

- if (isInstructionTriviallyDead(Inst, TLI))

- DeadInsts.push_back(Inst);

+ bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);

- // If we determined that this PHI is safe to replace even if an LCSSA

- // PHI, do so.

- if (LCSSASafePhiForRAUW) {

- PN->replaceAllUsesWith(ExitVal);

- PN->eraseFromParent();

- }

+ // Transformation.

+ for (const RewritePhi &Phi : RewritePhiSet) {

+ PHINode *PN = Phi.PN;

+ Value *ExitVal = Phi.Val;

+ // Only do the rewrite when the ExitValue can be expanded cheaply.

+ // If LoopCanBeDel is true, rewrite exit value aggressively.

+ if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {

+ DeadInsts.push_back(ExitVal);

+ continue;

+ }

+ Changed = true;

+ ++NumReplaced;

+ Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));

+ PN->setIncomingValue(Phi.Ith, ExitVal);

+ // If this instruction is dead now, delete it. Don't do it now to avoid

+ // invalidating iterators.

+ if (isInstructionTriviallyDead(Inst, TLI))

+ DeadInsts.push_back(Inst);

+ // If we determined that this PHI is safe to replace even if an LCSSA

+ // PHI, do so.

+ if (Phi.SafePhi) {

+ PN->replaceAllUsesWith(ExitVal);

+ PN->eraseFromParent();

}

@@ -629,6 +683,65 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {

Rewriter.clearInsertPoint();

}

+/// CanLoopBeDeleted - Check whether it is possible to delete the loop after

+/// rewriting exit value. If it is possible, ignore ReplaceExitValue and

+/// do rewriting aggressively.

+bool IndVarSimplify::CanLoopBeDeleted(

+ Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {

+ BasicBlock *Preheader = L->getLoopPreheader();

+ // If there is no preheader, the loop will not be deleted.

+ if (!Preheader)

+ return false;

+ // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.

+ // We obviate multiple ExitingBlocks case for simplicity.

+ // TODO: If we see testcase with multiple ExitingBlocks can be deleted

+ // after exit value rewriting, we can enhance the logic here.

+ SmallVector<BasicBlock *, 4> ExitingBlocks;

+ L->getExitingBlocks(ExitingBlocks);

+ SmallVector<BasicBlock *, 8> ExitBlocks;

+ L->getUniqueExitBlocks(ExitBlocks);

+ if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)

+ return false;

+ BasicBlock *ExitBlock = ExitBlocks[0];

+ BasicBlock::iterator BI = ExitBlock->begin();

+ while (PHINode *P = dyn_cast<PHINode>(BI)) {

+ Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);

+ // If the Incoming value of P is found in RewritePhiSet, we know it

+ // could be rewritten to use a loop invariant value in transformation

+ // phase later. Skip it in the loop invariant check below.

+ bool found = false;

+ for (const RewritePhi &Phi : RewritePhiSet) {

+ unsigned i = Phi.Ith;

+ if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {

+ found = true;

+ break;

+ }

+ Instruction *I;

+ if (!found && (I = dyn_cast<Instruction>(Incoming)))

+ if (!L->hasLoopInvariantOperands(I))

+ return false;

+ ++BI;

+ }

+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();

+ LI != LE; ++LI) {

+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;

+ ++BI) {

+ if (BI->mayHaveSideEffects())

+ return false;

+ }

+ return true;

//===----------------------------------------------------------------------===//

// IV Widening - Extend the width of an IV to cover its widest uses.

//===----------------------------------------------------------------------===//

@@ -989,7 +1102,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {

IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());

Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());

UsePhi->replaceAllUsesWith(Trunc);

- DeadInsts.push_back(UsePhi);

+ DeadInsts.emplace_back(UsePhi);

DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi

<< " to " << *WidePhi << "\n");

}

@@ -1022,7 +1135,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {

<< " replaced by " << *DU.WideDef << "\n");

++NumElimExt;

DU.NarrowUse->replaceAllUsesWith(NewDef);

- DeadInsts.push_back(DU.NarrowUse);

+ DeadInsts.emplace_back(DU.NarrowUse);

}

// Now that the extend is gone, we want to expose it's uses for potential

// further simplification. We don't need to directly inform SimplifyIVUsers

@@ -1075,7 +1188,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {

if (WideAddRec != SE->getSCEV(WideUse)) {

DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse

<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");

- DeadInsts.push_back(WideUse);

+ DeadInsts.emplace_back(WideUse);

return nullptr;

}

@@ -1172,7 +1285,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {

// WidenIVUse may have removed the def-use edge.

if (DU.NarrowDef->use_empty())

- DeadInsts.push_back(DU.NarrowDef);

+ DeadInsts.emplace_back(DU.NarrowDef);

}

return WidePhi;

}

@@ -1867,7 +1980,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {

// loop into any instructions outside of the loop that use the final values of

// the current expressions.

- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))

+ if (ReplaceExitValue != NeverRepl &&

+ !isa<SCEVCouldNotCompute>(BackedgeTakenCount))

RewriteLoopExitValues(L, Rewriter);

// Eliminate redundant IV cycles.

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 584c7aee7f1d..4b59f3d2f6cc 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

@@ -811,7 +811,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {

if (Instruction *U = dyn_cast<Instruction>(O)) {

O = nullptr;

if (U->use_empty())

- DeadInsts.push_back(U);

+ DeadInsts.emplace_back(U);

}

I->eraseFromParent();

@@ -2917,7 +2917,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,

IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");

}

Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);

- DeadInsts.push_back(Inc.IVOperand);

+ DeadInsts.emplace_back(Inc.IVOperand);

}

// If LSR created a new, wider phi, we may also replace its postinc. We only

// do this if we also found a wide value for the head of the chain.

@@ -2939,7 +2939,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,

IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");

}

Phi->replaceUsesOfWith(PostIncV, IVOper);

- DeadInsts.push_back(PostIncV);

+ DeadInsts.emplace_back(PostIncV);

}

@@ -4594,7 +4594,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,

// form, update the ICmp's other operand.

if (LU.Kind == LSRUse::ICmpZero) {

ICmpInst *CI = cast<ICmpInst>(LF.UserInst);

- DeadInsts.push_back(CI->getOperand(1));

+ DeadInsts.emplace_back(CI->getOperand(1));

assert(!F.BaseGV && "ICmp does not support folding a global value and "

"a scale at the same time!");

if (F.Scale == -1) {

@@ -4737,7 +4737,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,

LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);

}

- DeadInsts.push_back(LF.OperandValToReplace);

+ DeadInsts.emplace_back(LF.OperandValToReplace);

}

/// ImplementSolution - Rewrite all the fixup locations with new values,

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ccafd100ef0f..4ccbfc953e0c 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp

@@ -38,25 +38,25 @@ using namespace llvm;

#define DEBUG_TYPE "loop-unroll"

static cl::opt<unsigned>

-UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,

- cl::desc("The cut-off point for automatic loop unrolling"));

+ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,

+ cl::desc("The baseline cost threshold for loop unrolling"));

+static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(

+ "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,

+ cl::desc("The percentage of estimated dynamic cost which must be saved by "

+ "unrolling to allow unrolling up to the max threshold."));

+static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(

+ "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,

+ cl::desc("This is the amount discounted from the total unroll cost when "

+ "the unrolled form has a high dynamic cost savings (triggered by "

+ "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));

static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(

"unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,

cl::desc("Don't allow loop unrolling to simulate more than this number of"

"iterations when checking full unroll profitability"));

-static cl::opt<unsigned> UnrollMinPercentOfOptimized(

- "unroll-percent-of-optimized-for-complete-unroll", cl::init(20), cl::Hidden,

- cl::desc("If complete unrolling could trigger further optimizations, and, "

- "by that, remove the given percent of instructions, perform the "

- "complete unroll even if it's beyond the threshold"));

-static cl::opt<unsigned> UnrollAbsoluteThreshold(

- "unroll-absolute-threshold", cl::init(2000), cl::Hidden,

- cl::desc("Don't unroll if the unrolled size is bigger than this threshold,"

- " even if we can remove big portion of instructions later."));

static cl::opt<unsigned>

UnrollCount("unroll-count", cl::init(0), cl::Hidden,

cl::desc("Use this unroll count for all loops including those with "

@@ -82,16 +82,18 @@ namespace {

static char ID; // Pass ID, replacement for typeid

LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {

CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);

- CurrentAbsoluteThreshold = UnrollAbsoluteThreshold;

- CurrentMinPercentOfOptimized = UnrollMinPercentOfOptimized;

+ CurrentPercentDynamicCostSavedThreshold =

+ UnrollPercentDynamicCostSavedThreshold;

+ CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;

CurrentCount = (C == -1) ? UnrollCount : unsigned(C);

CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;

CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;

UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);

- UserAbsoluteThreshold = (UnrollAbsoluteThreshold.getNumOccurrences() > 0);

- UserPercentOfOptimized =

- (UnrollMinPercentOfOptimized.getNumOccurrences() > 0);

+ UserPercentDynamicCostSavedThreshold =

+ (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);

+ UserDynamicCostSavingsDiscount =

+ (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);

UserAllowPartial = (P != -1) ||

(UnrollAllowPartial.getNumOccurrences() > 0);

UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);

@@ -115,18 +117,18 @@ namespace {

unsigned CurrentCount;

unsigned CurrentThreshold;

- unsigned CurrentAbsoluteThreshold;

- unsigned CurrentMinPercentOfOptimized;

- bool CurrentAllowPartial;

- bool CurrentRuntime;

- bool UserCount; // CurrentCount is user-specified.

- bool UserThreshold; // CurrentThreshold is user-specified.

- bool UserAbsoluteThreshold; // CurrentAbsoluteThreshold is

- // user-specified.

- bool UserPercentOfOptimized; // CurrentMinPercentOfOptimized is

- // user-specified.

- bool UserAllowPartial; // CurrentAllowPartial is user-specified.

- bool UserRuntime; // CurrentRuntime is user-specified.

+ unsigned CurrentPercentDynamicCostSavedThreshold;

+ unsigned CurrentDynamicCostSavingsDiscount;

+ bool CurrentAllowPartial;

+ bool CurrentRuntime;

+ // Flags for whether the 'current' settings are user-specified.

+ bool UserCount;

+ bool UserThreshold;

+ bool UserPercentDynamicCostSavedThreshold;

+ bool UserDynamicCostSavingsDiscount;

+ bool UserAllowPartial;

+ bool UserRuntime;

bool runOnLoop(Loop *L, LPPassManager &LPM) override;

@@ -156,8 +158,9 @@ namespace {

void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,

TargetTransformInfo::UnrollingPreferences &UP) {

UP.Threshold = CurrentThreshold;

- UP.AbsoluteThreshold = CurrentAbsoluteThreshold;

- UP.MinPercentOfOptimized = CurrentMinPercentOfOptimized;

+ UP.PercentDynamicCostSavedThreshold =

+ CurrentPercentDynamicCostSavedThreshold;

+ UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;

UP.OptSizeThreshold = OptSizeUnrollThreshold;

UP.PartialThreshold = CurrentThreshold;

UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;

@@ -186,8 +189,8 @@ namespace {

void selectThresholds(const Loop *L, bool HasPragma,

const TargetTransformInfo::UnrollingPreferences &UP,

unsigned &Threshold, unsigned &PartialThreshold,

- unsigned &AbsoluteThreshold,

- unsigned &PercentOfOptimizedForCompleteUnroll) {

+ unsigned &PercentDynamicCostSavedThreshold,

+ unsigned &DynamicCostSavingsDiscount) {

// Determine the current unrolling threshold. While this is

// normally set from UnrollThreshold, it is overridden to a

// smaller value if the current function is marked as

@@ -195,11 +198,13 @@ namespace {

// specified.

Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;

PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;

- AbsoluteThreshold = UserAbsoluteThreshold ? CurrentAbsoluteThreshold

- : UP.AbsoluteThreshold;

- PercentOfOptimizedForCompleteUnroll = UserPercentOfOptimized

- ? CurrentMinPercentOfOptimized

- : UP.MinPercentOfOptimized;

+ PercentDynamicCostSavedThreshold =

+ UserPercentDynamicCostSavedThreshold

+ ? CurrentPercentDynamicCostSavedThreshold

+ : UP.PercentDynamicCostSavedThreshold;

+ DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount

+ ? CurrentDynamicCostSavingsDiscount

+ : UP.DynamicCostSavingsDiscount;

if (!UserThreshold &&

L->getHeader()->getParent()->hasFnAttribute(

@@ -220,9 +225,9 @@ namespace {

}

bool canUnrollCompletely(Loop *L, unsigned Threshold,

- unsigned AbsoluteThreshold, uint64_t UnrolledSize,

- unsigned NumberOfOptimizedInstructions,

- unsigned PercentOfOptimizedForCompleteUnroll);

+ unsigned PercentDynamicCostSavedThreshold,

+ unsigned DynamicCostSavingsDiscount,

+ uint64_t UnrolledCost, uint64_t RolledDynamicCost);

};

}

@@ -246,187 +251,6 @@ Pass *llvm::createSimpleLoopUnrollPass() {

}

namespace {

-/// \brief SCEV expressions visitor used for finding expressions that would

-/// become constants if the loop L is unrolled.

-struct FindConstantPointers {

- /// \brief Shows whether the expression is ConstAddress+Constant or not.

- bool IndexIsConstant;

- /// \brief Used for filtering out SCEV expressions with two or more AddRec

- /// subexpressions.

- ///

- /// Used to filter out complicated SCEV expressions, having several AddRec

- /// sub-expressions. We don't handle them, because unrolling one loop

- /// would help to replace only one of these inductions with a constant, and

- /// consequently, the expression would remain non-constant.

- bool HaveSeenAR;

- /// \brief If the SCEV expression becomes ConstAddress+Constant, this value

- /// holds ConstAddress. Otherwise, it's nullptr.

- Value *BaseAddress;

- /// \brief The loop, which we try to completely unroll.

- const Loop *L;

- ScalarEvolution &SE;

- FindConstantPointers(const Loop *L, ScalarEvolution &SE)

- : IndexIsConstant(true), HaveSeenAR(false), BaseAddress(nullptr),

- L(L), SE(SE) {}

- /// Examine the given expression S and figure out, if it can be a part of an

- /// expression, that could become a constant after the loop is unrolled.

- /// The routine sets IndexIsConstant and HaveSeenAR according to the analysis

- /// results.

- /// \returns true if we need to examine subexpressions, and false otherwise.

- bool follow(const SCEV *S) {

- if (const SCEVUnknown *SC = dyn_cast<SCEVUnknown>(S)) {

- // We've reached the leaf node of SCEV, it's most probably just a

- // variable.

- // If it's the only one SCEV-subexpression, then it might be a base

- // address of an index expression.

- // If we've already recorded base address, then just give up on this SCEV

- // - it's too complicated.

- if (BaseAddress) {

- IndexIsConstant = false;

- return false;

- }

- BaseAddress = SC->getValue();

- return false;

- }

- if (isa<SCEVConstant>(S))

- return false;

- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

- // If the current SCEV expression is AddRec, and its loop isn't the loop

- // we are about to unroll, then we won't get a constant address after

- // unrolling, and thus, won't be able to eliminate the load.

- if (AR->getLoop() != L) {

- IndexIsConstant = false;

- return false;

- }

- // We don't handle multiple AddRecs here, so give up in this case.

- if (HaveSeenAR) {

- IndexIsConstant = false;

- return false;

- }

- HaveSeenAR = true;

- }

- // Continue traversal.

- return true;

- }

- bool isDone() const { return !IndexIsConstant; }

-};

-} // End anonymous namespace.

-namespace {

-/// \brief A cache of SCEV results used to optimize repeated queries to SCEV on

-/// the same set of instructions.

-///

-/// The primary cost this saves is the cost of checking the validity of a SCEV

-/// every time it is looked up. However, in some cases we can provide a reduced

-/// and especially useful model for an instruction based upon SCEV that is

-/// non-trivial to compute but more useful to clients.

-class SCEVCache {

-public:

- /// \brief Struct to represent a GEP whose start and step are known fixed

- /// offsets from a base address due to SCEV's analysis.

- struct GEPDescriptor {

- Value *BaseAddr = nullptr;

- unsigned Start = 0;

- unsigned Step = 0;

- };

- Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP);

- SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {}

-private:

- const Loop &L;

- ScalarEvolution &SE;

- SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors;

-};

-} // End anonymous namespace.

-/// \brief Get a simplified descriptor for a GEP instruction.

-///

-/// Where possible, this produces a simplified descriptor for a GEP instruction

-/// using SCEV analysis of the containing loop. If this isn't possible, it

-/// returns an empty optional.

-///

-/// The model is a base address, an initial offset, and a per-iteration step.

-/// This fits very common patterns of GEPs inside loops and is something we can

-/// use to simulate the behavior of a particular iteration of a loop.

-///

-/// This is a cached interface. The first call may do non-trivial work to

-/// compute the result, but all subsequent calls will return a fast answer

-/// based on a cached result. This includes caching negative results.

-Optional<SCEVCache::GEPDescriptor>

-SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) {

- decltype(GEPDescriptors)::iterator It;

- bool Inserted;

- std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}});

- if (!Inserted) {

- if (!It->second.BaseAddr)

- return None;

- return It->second;

- }

- // We've inserted a new record into the cache, so compute the GEP descriptor

- // if possible.

- Value *V = cast<Value>(GEP);

- if (!SE.isSCEVable(V->getType()))

- return None;

- const SCEV *S = SE.getSCEV(V);

- // FIXME: It'd be nice if the worklist and set used by the

- // SCEVTraversal could be re-used between loop iterations, but the

- // interface doesn't support that. There is no way to clear the visited

- // sets between uses.

- FindConstantPointers Visitor(&L, SE);

- SCEVTraversal<FindConstantPointers> T(Visitor);

- // Try to find (BaseAddress+Step+Offset) tuple.

- // If succeeded, save it to the cache - it might help in folding

- // loads.

- T.visitAll(S);

- if (!Visitor.IndexIsConstant || !Visitor.BaseAddress)

- return None;

- const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress);

- if (BaseAddrSE->getType() != S->getType())

- return None;

- const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE);

- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE);

- if (!AR)

- return None;

- const SCEVConstant *StepSE =

- dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));

- const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart());

- if (!StepSE || !StartSE)

- return None;

- // Check and skip caching if doing so would require lots of bits to

- // avoid overflow.

- APInt Start = StartSE->getValue()->getValue();

- APInt Step = StepSE->getValue()->getValue();

- if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32)

- return None;

- // We found a cacheable SCEV model for the GEP.

- It->second.BaseAddr = Visitor.BaseAddress;

- It->second.Start = Start.getLimitedValue();

- It->second.Step = Step.getLimitedValue();

- return It->second;

-namespace {

// This class is used to get an estimate of the optimization effects that we

// could get from complete loop unrolling. It comes from the fact that some

// loads might be replaced with concrete constant values and that could trigger

@@ -446,17 +270,31 @@ namespace {

class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {

typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;

friend class InstVisitor<UnrolledInstAnalyzer, bool>;

+ struct SimplifiedAddress {

+ Value *Base = nullptr;

+ ConstantInt *Offset = nullptr;

+ };

public:

UnrolledInstAnalyzer(unsigned Iteration,

DenseMap<Value *, Constant *> &SimplifiedValues,

- SCEVCache &SC)

- : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {}

+ const Loop *L, ScalarEvolution &SE)

+ : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {

+ IterationNumber = SE.getConstant(APInt(64, Iteration));

+ }

// Allow access to the initial visit method.

using Base::visit;

private:

+ /// \brief A cache of pointer bases and constant-folded offsets corresponding

+ /// to GEP (or derived from GEP) instructions.

+ ///

+ /// In order to find the base pointer one needs to perform non-trivial

+ /// traversal of the corresponding SCEV expression, so it's good to have the

+ /// results saved.

+ DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;

/// \brief Number of currently simulated iteration.

///

/// If an expression is ConstAddress+Constant, then the Constant is

@@ -464,18 +302,71 @@ private:

/// SCEVGEPCache.

unsigned Iteration;

- // While we walk the loop instructions, we we build up and maintain a mapping

- // of simplified values specific to this iteration. The idea is to propagate

- // any special information we have about loads that can be replaced with

- // constants after complete unrolling, and account for likely simplifications

- // post-unrolling.

+ /// \brief SCEV expression corresponding to number of currently simulated

+ /// iteration.

+ const SCEV *IterationNumber;

+ /// \brief A Value->Constant map for keeping values that we managed to

+ /// constant-fold on the given iteration.

+ ///

+ /// While we walk the loop instructions, we build up and maintain a mapping

+ /// of simplified values specific to this iteration. The idea is to propagate

+ /// any special information we have about loads that can be replaced with

+ /// constants after complete unrolling, and account for likely simplifications

+ /// post-unrolling.

DenseMap<Value *, Constant *> &SimplifiedValues;

- // We use a cache to wrap all our SCEV queries.

- SCEVCache &SC;

+ const Loop *L;

+ ScalarEvolution &SE;

+ /// \brief Try to simplify instruction \param I using its SCEV expression.

+ ///

+ /// The idea is that some AddRec expressions become constants, which then

+ /// could trigger folding of other instructions. However, that only happens

+ /// for expressions whose start value is also constant, which isn't always the

+ /// case. In another common and important case the start value is just some

+ /// address (i.e. SCEVUnknown) - in this case we compute the offset and save

+ /// it along with the base address instead.

+ bool simplifyInstWithSCEV(Instruction *I) {

+ if (!SE.isSCEVable(I->getType()))

+ return false;

+ const SCEV *S = SE.getSCEV(I);

+ if (auto *SC = dyn_cast<SCEVConstant>(S)) {

+ SimplifiedValues[I] = SC->getValue();

+ return true;

+ }

+ auto *AR = dyn_cast<SCEVAddRecExpr>(S);

+ if (!AR)

+ return false;

+ const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE);

+ // Check if the AddRec expression becomes a constant.

+ if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) {

+ SimplifiedValues[I] = SC->getValue();

+ return true;

+ }

+ // Check if the offset from the base address becomes a constant.

+ auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));

+ if (!Base)

+ return false;

+ auto *Offset =

+ dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base));

+ if (!Offset)

+ return false;

+ SimplifiedAddress Address;

+ Address.Base = Base->getValue();

+ Address.Offset = Offset->getValue();

+ SimplifiedAddresses[I] = Address;

+ return true;

+ }

/// Base case for the instruction visitor.

- bool visitInstruction(Instruction &I) { return false; };

+ bool visitInstruction(Instruction &I) {

+ return simplifyInstWithSCEV(&I);

+ }

/// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.

@@ -492,6 +383,7 @@ private:

if (!isa<Constant>(RHS))

if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))

RHS = SimpleRHS;

Value *SimpleV = nullptr;

const DataLayout &DL = I.getModule()->getDataLayout();

if (auto FI = dyn_cast<FPMathOperator>(&I))

@@ -503,24 +395,21 @@ private:

if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))

SimplifiedValues[&I] = C;

- return SimpleV;

+ if (SimpleV)

+ return true;

+ return Base::visitBinaryOperator(I);

}

/// Try to fold load I.

bool visitLoad(LoadInst &I) {

Value *AddrOp = I.getPointerOperand();

- if (!isa<Constant>(AddrOp))

- if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp))

- AddrOp = SimplifiedAddrOp;

- auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp);

- if (!GEP)

- return false;

- auto OptionalGEPDesc = SC.getGEPDescriptor(GEP);

- if (!OptionalGEPDesc)

+ auto AddressIt = SimplifiedAddresses.find(AddrOp);

+ if (AddressIt == SimplifiedAddresses.end())

return false;

+ ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;

- auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr);

+ auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);

// We're only interested in loads that can be completely folded to a

// constant.

if (!GV || !GV->hasInitializer())

@@ -531,13 +420,10 @@ private:

if (!CDS)

return false;

- // This calculation should never overflow because we bound Iteration quite

- // low and both the start and step are 32-bit integers. We use signed

- // integers so that UBSan will catch if a bug sneaks into the code.

int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;

- int64_t Index = ((int64_t)OptionalGEPDesc->Start +

- (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) /

- ElemSize;

+ assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&

+ "Unexpectedly large index value.");

+ int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;

if (Index >= CDS->getNumElements()) {

// FIXME: For now we conservatively ignore out of bound accesses, but

// we're allowed to perform the optimization in this case.

@@ -556,11 +442,12 @@ private:

namespace {

struct EstimatedUnrollCost {

- /// \brief Count the number of optimized instructions.

- unsigned NumberOfOptimizedInstructions;

+ /// \brief The estimated cost after unrolling.

+ unsigned UnrolledCost;

- /// \brief Count the total number of instructions.

- unsigned UnrolledLoopSize;

+ /// \brief The estimated dynamic cost of executing the instructions in the

+ /// rolled form.

+ unsigned RolledDynamicCost;

};

}

@@ -593,12 +480,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,

SmallSetVector<BasicBlock *, 16> BBWorklist;

DenseMap<Value *, Constant *> SimplifiedValues;

- // Use a cache to access SCEV expressions so that we don't pay the cost on

- // each iteration. This cache is lazily self-populating.

- SCEVCache SC(*L, SE);

- unsigned NumberOfOptimizedInstructions = 0;

- unsigned UnrolledLoopSize = 0;

+ // The estimated cost of the unrolled form of the loop. We try to estimate

+ // this by simplifying as much as we can while computing the estimate.

+ unsigned UnrolledCost = 0;

+ // We also track the estimated dynamic (that is, actually executed) cost in

+ // the rolled form. This helps identify cases when the savings from unrolling

+ // aren't just exposing dead control flows, but actual reduced dynamic

+ // instructions due to the simplifications which we expect to occur after

+ // unrolling.

+ unsigned RolledDynamicCost = 0;

// Simulate execution of each iteration of the loop counting instructions,

// which would be simplified.

@@ -606,7 +496,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,

// we literally have to go through all loop's iterations.

for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {

SimplifiedValues.clear();

- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC);

+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);

BBWorklist.clear();

BBWorklist.insert(L->getHeader());

@@ -618,17 +508,20 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,

// it. We don't change the actual IR, just count optimization

// opportunities.

for (Instruction &I : *BB) {

- UnrolledLoopSize += TTI.getUserCost(&I);

+ unsigned InstCost = TTI.getUserCost(&I);

// Visit the instruction to analyze its loop cost after unrolling,

- // and if the visitor returns true, then we can optimize this

- // instruction away.

- if (Analyzer.visit(I))

- NumberOfOptimizedInstructions += TTI.getUserCost(&I);

+ // and if the visitor returns false, include this instruction in the

+ // unrolled cost.

+ if (!Analyzer.visit(I))

+ UnrolledCost += InstCost;

+ // Also track this instructions expected cost when executing the rolled

+ // loop form.

+ RolledDynamicCost += InstCost;

// If unrolled body turns out to be too big, bail out.

- if (UnrolledLoopSize - NumberOfOptimizedInstructions >

- MaxUnrolledLoopSize)

+ if (UnrolledCost > MaxUnrolledLoopSize)

return None;

}

@@ -640,10 +533,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,

// If we found no optimization opportunities on the first iteration, we

// won't find them on later ones too.

- if (!NumberOfOptimizedInstructions)

+ if (UnrolledCost == RolledDynamicCost)

return None;

}

- return {{NumberOfOptimizedInstructions, UnrolledLoopSize}};

+ return {{UnrolledCost, RolledDynamicCost}};

}

/// ApproximateLoopSize - Approximate the size of the loop.

@@ -749,46 +642,56 @@ static void SetLoopAlreadyUnrolled(Loop *L) {

L->setLoopID(NewLoopID);

}

-bool LoopUnroll::canUnrollCompletely(

- Loop *L, unsigned Threshold, unsigned AbsoluteThreshold,

- uint64_t UnrolledSize, unsigned NumberOfOptimizedInstructions,

- unsigned PercentOfOptimizedForCompleteUnroll) {

+bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,

+ unsigned PercentDynamicCostSavedThreshold,

+ unsigned DynamicCostSavingsDiscount,

+ uint64_t UnrolledCost,

+ uint64_t RolledDynamicCost) {

if (Threshold == NoThreshold) {

DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");

return true;

}

- if (UnrolledSize <= Threshold) {

- DEBUG(dbgs() << " Can fully unroll, because unrolled size: "

- << UnrolledSize << "<" << Threshold << "\n");

+ if (UnrolledCost <= Threshold) {

+ DEBUG(dbgs() << " Can fully unroll, because unrolled cost: "

+ << UnrolledCost << "<" << Threshold << "\n");

return true;

}

- assert(UnrolledSize && "UnrolledSize can't be 0 at this point.");

- unsigned PercentOfOptimizedInstructions =

- (uint64_t)NumberOfOptimizedInstructions * 100ull / UnrolledSize;

- if (UnrolledSize <= AbsoluteThreshold &&

- PercentOfOptimizedInstructions >= PercentOfOptimizedForCompleteUnroll) {

- DEBUG(dbgs() << " Can fully unroll, because unrolling will help removing "

- << PercentOfOptimizedInstructions

- << "% instructions (threshold: "

- << PercentOfOptimizedForCompleteUnroll << "%)\n");

- DEBUG(dbgs() << " Unrolled size (" << UnrolledSize

- << ") is less than the threshold (" << AbsoluteThreshold

- << ").\n");

+ assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");

+ assert(RolledDynamicCost >= UnrolledCost &&

+ "Cannot have a higher unrolled cost than a rolled cost!");

+ // Compute the percentage of the dynamic cost in the rolled form that is

+ // saved when unrolled. If unrolling dramatically reduces the estimated

+ // dynamic cost of the loop, we use a higher threshold to allow more

+ // unrolling.

+ unsigned PercentDynamicCostSaved =

+ (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;

+ if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&

+ (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=

+ (int64_t)Threshold) {

+ DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "

+ "expected dynamic cost by " << PercentDynamicCostSaved

+ << "% (threshold: " << PercentDynamicCostSavedThreshold

+ << "%)\n"

+ << " and the unrolled cost (" << UnrolledCost

+ << ") is less than the max threshold ("

+ << DynamicCostSavingsDiscount << ").\n");

return true;

}

DEBUG(dbgs() << " Too large to fully unroll:\n");

- DEBUG(dbgs() << " Unrolled size: " << UnrolledSize << "\n");

- DEBUG(dbgs() << " Estimated number of optimized instructions: "

- << NumberOfOptimizedInstructions << "\n");

- DEBUG(dbgs() << " Absolute threshold: " << AbsoluteThreshold << "\n");

- DEBUG(dbgs() << " Minimum percent of removed instructions: "

- << PercentOfOptimizedForCompleteUnroll << "\n");

- DEBUG(dbgs() << " Threshold for small loops: " << Threshold << "\n");

+ DEBUG(dbgs() << " Threshold: " << Threshold << "\n");

+ DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n");

+ DEBUG(dbgs() << " Percent cost saved threshold: "

+ << PercentDynamicCostSavedThreshold << "%\n");

+ DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n");

+ DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n");

+ DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved

+ << "\n");

return false;

}

@@ -899,9 +802,11 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {

}

unsigned Threshold, PartialThreshold;

- unsigned AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll;

+ unsigned PercentDynamicCostSavedThreshold;

+ unsigned DynamicCostSavingsDiscount;

selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,

- AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll);

+ PercentDynamicCostSavedThreshold,

+ DynamicCostSavingsDiscount);

// Given Count, TripCount and thresholds determine the type of

// unrolling which is to be performed.

@@ -910,20 +815,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {

if (TripCount && Count == TripCount) {

Unrolling = Partial;

// If the loop is really small, we don't need to run an expensive analysis.

- if (canUnrollCompletely(

- L, Threshold, AbsoluteThreshold,

- UnrolledSize, 0, 100)) {

+ if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,

+ UnrolledSize, UnrolledSize)) {

Unrolling = Full;

} else {

// The loop isn't that small, but we still can fully unroll it if that

// helps to remove a significant number of instructions.

// To check that, run additional analysis on the loop.

- if (Optional<EstimatedUnrollCost> Cost =

- analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold))

- if (canUnrollCompletely(L, Threshold, AbsoluteThreshold,

- Cost->UnrolledLoopSize,

- Cost->NumberOfOptimizedInstructions,

- PercentOfOptimizedForCompleteUnroll)) {

+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(

+ L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))

+ if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,

+ DynamicCostSavingsDiscount, Cost->UnrolledCost,

+ Cost->RolledDynamicCost)) {

Unrolling = Full;

}

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 66d6ac6f3a09..2bdf670f67e3 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

@@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {

// Check that nothing touches the dest of the "copy" between

// the call and the store.

AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

- AliasAnalysis::Location StoreLoc = AA.getLocation(SI);

+ AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);

for (BasicBlock::iterator I = --BasicBlock::iterator(SI),

E = C; I != E; --I) {

if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {

@@ -802,9 +802,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

- MemDepResult SourceDep =

- MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),

- false, M, M->getParent());

+ MemDepResult SourceDep = MD->getPointerDependencyFrom(

+ MemoryLocation::getForSource(MDep), false, M, M->getParent());

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

@@ -812,7 +811,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {

// source and dest might overlap. We still want to eliminate the intermediate

// value, but we have to generate a memmove instead of memcpy.

bool UseMemMove = false;

- if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))

+ if (!AA.isNoAlias(MemoryLocation::getForDest(M),

+ MemoryLocation::getForSource(MDep)))

UseMemMove = true;

// If all checks passed, then we can transform M.

@@ -860,9 +860,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,

return false;

// Check that there are no other dependencies on the memset destination.

- MemDepResult DstDepInfo =

- MD->getPointerDependencyFrom(AliasAnalysis::getLocationForDest(MemSet),

- false, MemCpy, MemCpy->getParent());

+ MemDepResult DstDepInfo = MD->getPointerDependencyFrom(

+ MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());

if (DstDepInfo.getInst() != MemSet)

return false;

@@ -998,7 +997,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {

}

- AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);

+ AliasAnalysis::Location SrcLoc = MemoryLocation::getForSource(M);

MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,

M, M->getParent());

@@ -1047,7 +1046,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {

return false;

// See if the pointers alias.

- if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))

+ if (!AA.isNoAlias(MemoryLocation::getForDest(M),

+ MemoryLocation::getForSource(M)))

return false;

DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");

@@ -1121,8 +1121,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

MemDepResult SourceDep =

- MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),

- false, CS.getInstruction(), MDep->getParent());

+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,

+ CS.getInstruction(), MDep->getParent());

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 611a941b0b21..776dfb4d487f 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp

@@ -241,7 +241,7 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {

bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,

const Instruction& End,

LoadInst* LI) {

- AliasAnalysis::Location Loc = AA->getLocation(LI);

+ AliasAnalysis::Location Loc = MemoryLocation::get(LI);

return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);

}

@@ -266,8 +266,8 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,

LoadInst *Load1 = dyn_cast<LoadInst>(Inst);

BasicBlock *BB0 = Load0->getParent();

- AliasAnalysis::Location Loc0 = AA->getLocation(Load0);

- AliasAnalysis::Location Loc1 = AA->getLocation(Load1);

+ AliasAnalysis::Location Loc0 = MemoryLocation::get(Load0);

+ AliasAnalysis::Location Loc1 = MemoryLocation::get(Load1);

if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&

!isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&

!isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {

@@ -425,8 +425,8 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,

StoreInst *Store1 = cast<StoreInst>(Inst);

- AliasAnalysis::Location Loc0 = AA->getLocation(Store0);

- AliasAnalysis::Location Loc1 = AA->getLocation(Store1);

+ AliasAnalysis::Location Loc0 = MemoryLocation::get(Store0);

+ AliasAnalysis::Location Loc1 = MemoryLocation::get(Store1);

if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&

!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),

BB1->back(), Loc1) &&

diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 5b370e04088f..4cf68b00da0a 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp

@@ -234,6 +234,7 @@ bool NaryReassociate::doOneIteration(Function &F) {

BasicBlock *BB = Node->getBlock();

for (auto I = BB->begin(); I != BB->end(); ++I) {

if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {

+ const SCEV *OldSCEV = SE->getSCEV(I);

if (Instruction *NewI = tryReassociate(I)) {

Changed = true;

SE->forgetValue(I);

@@ -243,7 +244,28 @@ bool NaryReassociate::doOneIteration(Function &F) {

}

// Add the rewritten instruction to SeenExprs; the original instruction

// is deleted.

- SeenExprs[SE->getSCEV(I)].push_back(I);

+ const SCEV *NewSCEV = SE->getSCEV(I);

+ SeenExprs[NewSCEV].push_back(I);

+ // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)

+ // is equivalent to I. However, ScalarEvolution::getSCEV may

+ // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose

+ // we reassociate

+ // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4

+ // to

+ // NewI = &a[sext(i)] + sext(j).

+ //

+ // ScalarEvolution computes

+ // getSCEV(I) = a + 4 * sext(i + j)

+ // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)

+ // which are different SCEVs.

+ //

+ // To alleviate this issue of ScalarEvolution not always capturing

+ // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can

+ // map both SCEV before and after tryReassociate(I) to I.

+ //

+ // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.

+ if (NewSCEV != OldSCEV)

+ SeenExprs[OldSCEV].push_back(I);

}

@@ -295,8 +317,10 @@ static bool isGEPFoldable(GetElementPtrInst *GEP,

BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);

}

+ unsigned AddrSpace = GEP->getPointerAddressSpace();

return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,

- BaseOffset, HasBaseReg, Scale);

+ BaseOffset, HasBaseReg, Scale, AddrSpace);

}

Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {

diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 3e7deeba9f21..9ecaf102574a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp

@@ -496,7 +496,7 @@ template <typename T> static void unique_unsorted(std::vector<T> &vec) {

}

-static std::string GCSafepointPollName("gc.safepoint_poll");

+static const char *const GCSafepointPollName = "gc.safepoint_poll";

static bool isGCSafepointPoll(Function &F) {

return F.getName().equals(GCSafepointPollName);

diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b677523d7032..6c66b58729e9 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp

@@ -733,7 +733,7 @@ static bool LinearizeExprTree(BinaryOperator *I,

if (Ops.empty()) {

Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());

assert(Identity && "Associative operation without identity!");

- Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));

+ Ops.emplace_back(Identity, APInt(Bitwidth, 1));

}

return Changed;

@@ -1966,38 +1966,35 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {

if (!I->hasOneUse() || I->getType()->isVectorTy())

return nullptr;

- // Must be a mul, fmul, or fdiv instruction.

+ // Must be a fmul or fdiv instruction.

unsigned Opcode = I->getOpcode();

- if (Opcode != Instruction::Mul && Opcode != Instruction::FMul &&

- Opcode != Instruction::FDiv)

+ if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv)

return nullptr;

- // Must have at least one constant operand.

- Constant *C0 = dyn_cast<Constant>(I->getOperand(0));

- Constant *C1 = dyn_cast<Constant>(I->getOperand(1));

- if (!C0 && !C1)

+ auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));

+ auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));

+ // Both operands are constant, let it get constant folded away.

+ if (C0 && C1)

return nullptr;

- // Must be a negative ConstantInt or ConstantFP.

- Constant *C = C0 ? C0 : C1;

- unsigned ConstIdx = C0 ? 0 : 1;

- if (auto *CI = dyn_cast<ConstantInt>(C)) {

- if (!CI->isNegative() || CI->isMinValue(true))

- return nullptr;

- } else if (auto *CF = dyn_cast<ConstantFP>(C)) {

- if (!CF->isNegative())

- return nullptr;

- } else

+ ConstantFP *CF = C0 ? C0 : C1;

+ // Must have one constant operand.

+ if (!CF)

+ return nullptr;

+ // Must be a negative ConstantFP.

+ if (!CF->isNegative())

return nullptr;

// User must be a binary operator with one or more uses.

Instruction *User = I->user_back();

- if (!isa<BinaryOperator>(User) || !User->getNumUses())

+ if (!isa<BinaryOperator>(User) || !User->hasNUsesOrMore(1))

return nullptr;

unsigned UserOpcode = User->getOpcode();

- if (UserOpcode != Instruction::Add && UserOpcode != Instruction::FAdd &&

- UserOpcode != Instruction::Sub && UserOpcode != Instruction::FSub)

+ if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub)

return nullptr;

// Subtraction is not commutative. Explicitly, the following transform is

@@ -2006,14 +2003,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {

return nullptr;

// Change the sign of the constant.

- if (ConstantInt *CI = dyn_cast<ConstantInt>(C))

- I->setOperand(ConstIdx, ConstantInt::get(CI->getContext(), -CI->getValue()));

- else {

- ConstantFP *CF = cast<ConstantFP>(C);

- APFloat Val = CF->getValueAPF();

- Val.changeSign();

- I->setOperand(ConstIdx, ConstantFP::get(CF->getContext(), Val));

- }

+ APFloat Val = CF->getValueAPF();

+ Val.changeSign();

+ I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val));

// Canonicalize I to RHS to simplify the next bit of logic. E.g.,

// ((-Const*y) + x) -> (x + (-Const*y)).

@@ -2023,15 +2015,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {

Value *Op0 = User->getOperand(0);

Value *Op1 = User->getOperand(1);

BinaryOperator *NI;

- switch(UserOpcode) {

+ switch (UserOpcode) {

default:

llvm_unreachable("Unexpected Opcode!");

- case Instruction::Add:

- NI = BinaryOperator::CreateSub(Op0, Op1);

- break;

- case Instruction::Sub:

- NI = BinaryOperator::CreateAdd(Op0, Op1);

- break;

case Instruction::FAdd:

NI = BinaryOperator::CreateFSub(Op0, Op1);

NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());

diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 6cf765a8438c..6f6ba72c6e6f 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

@@ -30,6 +30,7 @@

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

+#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/Statepoint.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/Verifier.h"

@@ -74,13 +75,27 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",

cl::Hidden);

namespace {

-struct RewriteStatepointsForGC : public FunctionPass {

+struct RewriteStatepointsForGC : public ModulePass {

static char ID; // Pass identification, replacement for typeid

- RewriteStatepointsForGC() : FunctionPass(ID) {

+ RewriteStatepointsForGC() : ModulePass(ID) {

initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());

}

- bool runOnFunction(Function &F) override;

+ bool runOnFunction(Function &F);

+ bool runOnModule(Module &M) override {

+ bool Changed = false;

+ for (Function &F : M)

+ Changed |= runOnFunction(F);

+ if (Changed) {

+ // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn

+ // returns true for at least one function in the module. Since at least

+ // one function changed, we know that the precondition is satisfied.

+ stripDereferenceabilityInfo(M);

+ }

+ return Changed;

+ }

void getAnalysisUsage(AnalysisUsage &AU) const override {

// We add and rewrite a bunch of instructions, but don't really do much

@@ -88,12 +103,26 @@ struct RewriteStatepointsForGC : public FunctionPass {

AU.addRequired<DominatorTreeWrapperPass>();

AU.addRequired<TargetTransformInfoWrapperPass>();

}

+ /// The IR fed into RewriteStatepointsForGC may have had attributes implying

+ /// dereferenceability that are no longer valid/correct after

+ /// RewriteStatepointsForGC has run. This is because semantically, after

+ /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire

+ /// heap. stripDereferenceabilityInfo (conservatively) restores correctness

+ /// by erasing all attributes in the module that externally imply

+ /// dereferenceability.

+ ///

+ void stripDereferenceabilityInfo(Module &M);

+ // Helpers for stripDereferenceabilityInfo

+ void stripDereferenceabilityInfoFromBody(Function &F);

+ void stripDereferenceabilityInfoFromPrototype(Function &F);

};

} // namespace

char RewriteStatepointsForGC::ID = 0;

-FunctionPass *llvm::createRewriteStatepointsForGCPass() {

+ModulePass *llvm::createRewriteStatepointsForGCPass() {

return new RewriteStatepointsForGC();

}

@@ -1031,14 +1060,11 @@ static void recomputeLiveInValues(

// goes through the statepoint. We might need to split an edge to make this

// possible.

static BasicBlock *

-normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {

- DominatorTree *DT = nullptr;

- if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())

- DT = &DTP->getDomTree();

+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,

+ DominatorTree &DT) {

BasicBlock *Ret = BB;

if (!BB->getUniquePredecessor()) {

- Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);

+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);

}

// Now that 'ret' has unique predecessor we can safely remove all phi nodes

@@ -2016,9 +2042,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,

continue;

InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());

normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),

- P);

+ DT);

normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),

- P);

+ DT);

}

// A list of dummy calls added to the IR to keep various values obviously

@@ -2197,6 +2223,72 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,

return !records.empty();

}

+// Handles both return values and arguments for Functions and CallSites.

+template <typename AttrHolder>

+static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,

+ unsigned Index) {

+ AttrBuilder R;

+ if (AH.getDereferenceableBytes(Index))

+ R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,

+ AH.getDereferenceableBytes(Index)));

+ if (AH.getDereferenceableOrNullBytes(Index))

+ R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,

+ AH.getDereferenceableOrNullBytes(Index)));

+ if (!R.empty())

+ AH.setAttributes(AH.getAttributes().removeAttributes(

+ Ctx, Index, AttributeSet::get(Ctx, Index, R)));

+void

+RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {

+ LLVMContext &Ctx = F.getContext();

+ for (Argument &A : F.args())

+ if (isa<PointerType>(A.getType()))

+ RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);

+ if (isa<PointerType>(F.getReturnType()))

+ RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);

+void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {

+ if (F.empty())

+ return;

+ LLVMContext &Ctx = F.getContext();

+ MDBuilder Builder(Ctx);

+ for (Instruction &I : inst_range(F)) {

+ if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {

+ assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");

+ bool IsImmutableTBAA =

+ MD->getNumOperands() == 4 &&

+ mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1;

+ if (!IsImmutableTBAA)

+ continue; // no work to do, MD_tbaa is already marked mutable

+ MDNode *Base = cast<MDNode>(MD->getOperand(0));

+ MDNode *Access = cast<MDNode>(MD->getOperand(1));

+ uint64_t Offset =

+ mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();

+ MDNode *MutableTBAA =

+ Builder.createTBAAStructTagNode(Base, Access, Offset);

+ I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);

+ }

+ if (CallSite CS = CallSite(&I)) {

+ for (int i = 0, e = CS.arg_size(); i != e; i++)

+ if (isa<PointerType>(CS.getArgument(i)->getType()))

+ RemoveDerefAttrAtIndex(Ctx, CS, i + 1);

+ if (isa<PointerType>(CS.getType()))

+ RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);

+ }

/// Returns true if this function should be rewritten by this pass. The main

/// point of this function is as an extension point for custom logic.

static bool shouldRewriteStatepointsIn(Function &F) {

@@ -2211,6 +2303,19 @@ static bool shouldRewriteStatepointsIn(Function &F) {

return false;

}

+void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {

+#ifndef NDEBUG

+ assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&

+ "precondition!");

+#endif

+ for (Function &F : M)

+ stripDereferenceabilityInfoFromPrototype(F);

+ for (Function &F : M)

+ stripDereferenceabilityInfoFromBody(F);

bool RewriteStatepointsForGC::runOnFunction(Function &F) {

// Nothing to do for declarations.

if (F.isDeclaration() || F.empty())

@@ -2221,7 +2326,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {

if (!shouldRewriteStatepointsIn(F))

return false;

- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();

// Gather all the statepoints which need rewritten. Be careful to only

// consider those in reachable code since we need to ask dominance queries

diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 3a782d159dab..4a875311881a 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

@@ -852,9 +852,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {

TargetTransformInfo &TTI =

getAnalysis<TargetTransformInfoWrapperPass>().getTTI(

*GEP->getParent()->getParent());

+ unsigned AddrSpace = GEP->getPointerAddressSpace();

if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),

/*BaseGV=*/nullptr, AccumulativeByteOffset,

- /*HasBaseReg=*/true, /*Scale=*/0)) {

+ /*HasBaseReg=*/true, /*Scale=*/0,

+ AddrSpace)) {

return Changed;

}

diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 8566cd9736d3..f0e3ffdb95ac 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp

@@ -193,11 +193,18 @@ namespace {

struct CFGSimplifyPass : public FunctionPass {

static char ID; // Pass identification, replacement for typeid

unsigned BonusInstThreshold;

- CFGSimplifyPass(int T = -1) : FunctionPass(ID) {

+ std::function<bool(const Function &)> PredicateFtor;

+ CFGSimplifyPass(int T = -1,

+ std::function<bool(const Function &)> Ftor = nullptr)

+ : FunctionPass(ID), PredicateFtor(Ftor) {

BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);

initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());

}

bool runOnFunction(Function &F) override {

+ if (PredicateFtor && !PredicateFtor(F))

+ return false;

if (skipOptnoneFunction(F))

return false;

@@ -224,7 +231,9 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,

false)

// Public interface to the CFGSimplification pass

-FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {

- return new CFGSimplifyPass(Threshold);

+FunctionPass *

+llvm::createCFGSimplificationPass(int Threshold,

+ std::function<bool(const Function &)> Ftor) {

+ return new CFGSimplifyPass(Threshold, Ftor);

}

diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b169d5612f00..078c6a921a08 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp

@@ -163,7 +163,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,

}

if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {

- AliasAnalysis::Location Loc = AA->getLocation(L);

+ AliasAnalysis::Location Loc = MemoryLocation::get(L);

for (Instruction *S : Stores)

if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)

return false;

@@ -172,6 +172,12 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,

if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))

return false;

+ // Convergent operations can only be moved to control equivalent blocks.

+ if (auto CS = CallSite(Inst)) {

+ if (CS.hasFnAttr(Attribute::Convergent))

+ return false;

+ }

return true;

}