aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
commit1a82d4c088707c791c792f6822f611b47a12bdfe (patch)
tree7c411f9b5d807f7f204fdd16965d8925a82b6d18 /lib/Transforms/Scalar
parent3a0822f094b578157263e04114075ad7df81db41 (diff)
downloadsrc-1a82d4c088707c791c792f6822f611b47a12bdfe.tar.gz
src-1a82d4c088707c791c792f6822f611b47a12bdfe.zip
Vendor import of llvm trunk r241361:vendor/llvm/llvm-trunk-r241361
Notes
Notes: svn path=/vendor/llvm/dist/; revision=285163 svn path=/vendor/llvm/llvm-trunk-r241361/; revision=285164; tag=vendor/llvm/llvm-trunk-r241361
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp2
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp2
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp2
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp2
-rw-r--r--lib/Transforms/Scalar/DCE.cpp4
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp12
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp20
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp15
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp4
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp2
-rw-r--r--lib/Transforms/Scalar/LICM.cpp4
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp54
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp18
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp194
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp2
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp2
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp6
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp2
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp69
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp14
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp183
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp12
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp13
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp6
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp4
43 files changed, 442 insertions, 246 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fe0224bb56c7..d6fc91641588 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -44,7 +44,7 @@ struct ADCE : public FunctionPass {
AU.setPreservesCFG();
}
};
-} // namespace
+}
char ADCE::ID = 0;
INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index a4e5446a2b12..8918909f484a 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -76,7 +76,7 @@ struct AlignmentFromAssumptions : public FunctionPass {
const SCEV *&OffSCEV);
bool processAssumption(CallInst *I);
};
-} // namespace
+}
char AlignmentFromAssumptions::ID = 0;
static const char aip_name[] = "Alignment from assumptions";
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 8ffbacddda68..09c605e76737 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -66,7 +66,7 @@ struct BDCE : public FunctionPass {
AssumptionCache *AC;
DominatorTree *DT;
};
-} // namespace
+}
char BDCE::ID = 0;
INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index cc1dc9435a05..4288742dd3eb 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -171,7 +171,7 @@ private:
void deleteDeadCastInst() const;
bool optimizeConstants(Function &Fn);
};
-} // namespace
+}
char ConstantHoisting::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index e3df86ecf169..c974ebb9456f 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -47,7 +47,7 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
-} // namespace
+}
char ConstantPropagation::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b1809b7fae08..79624b2e4c47 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -56,7 +56,7 @@ namespace {
AU.addRequired<LazyValueInfo>();
}
};
-} // namespace
+}
char CorrelatedValuePropagation::ID = 0;
INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index aa628e5aca81..3b262a23091f 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -60,7 +60,7 @@ namespace {
AU.setPreservesCFG();
}
};
-} // namespace
+}
char DeadInstElimination::ID = 0;
INITIALIZE_PASS(DeadInstElimination, "die",
@@ -87,7 +87,7 @@ namespace {
AU.setPreservesCFG();
}
};
-} // namespace
+}
char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c99dc5fc8445..c50558434da2 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -92,7 +92,7 @@ namespace {
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
-} // namespace
+}
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 8b629eaca9d4..d536a937dce1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,7 +72,7 @@ struct SimpleValue {
isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
}
};
-} // namespace
+}
namespace llvm {
template <> struct DenseMapInfo<SimpleValue> {
@@ -85,7 +85,7 @@ template <> struct DenseMapInfo<SimpleValue> {
static unsigned getHashValue(SimpleValue Val);
static bool isEqual(SimpleValue LHS, SimpleValue RHS);
};
-} // namespace llvm
+}
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
Instruction *Inst = Val.Inst;
@@ -219,7 +219,7 @@ struct CallValue {
return true;
}
};
-} // namespace
+}
namespace llvm {
template <> struct DenseMapInfo<CallValue> {
@@ -232,7 +232,7 @@ template <> struct DenseMapInfo<CallValue> {
static unsigned getHashValue(CallValue Val);
static bool isEqual(CallValue LHS, CallValue RHS);
};
-} // namespace llvm
+}
unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
Instruction *Inst = Val.Inst;
@@ -447,7 +447,7 @@ private:
ExpectedType);
}
};
-} // namespace
+}
bool EarlyCSE::processNode(DomTreeNode *Node) {
BasicBlock *BB = Node->getBlock();
@@ -764,7 +764,7 @@ public:
AU.setPreservesCFG();
}
};
-} // namespace
+}
char EarlyCSELegacyPass::ID = 0;
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index dd6ea8d455c5..0430c1898c8d 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -36,7 +36,7 @@ public:
private:
AliasAnalysis *AA;
};
-} // namespace
+}
char FlattenCFGPass::ID = 0;
INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index bb90c5f73239..c9314229c38b 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -79,7 +79,7 @@ namespace {
MapVector<Instruction*, Value*> ConvertedInsts;
LLVMContext *Ctx;
};
-} // namespace
+}
char Float2Int::ID = 0;
INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index d9308c4e3710..60903c8b4aaf 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -138,7 +138,7 @@ namespace {
uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
void verifyRemoved(const Value *) const;
};
-} // namespace
+}
namespace llvm {
template <> struct DenseMapInfo<Expression> {
@@ -159,7 +159,7 @@ template <> struct DenseMapInfo<Expression> {
}
};
-} // namespace llvm
+}
//===----------------------------------------------------------------------===//
// ValueTable Internal Functions
@@ -723,7 +723,7 @@ namespace {
};
char GVN::ID = 0;
-} // namespace
+}
// The public interface to this file...
FunctionPass *llvm::createGVNPass(bool NoLoads) {
@@ -1783,13 +1783,9 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
- if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
- isa<OverflowingBinaryOperator>(ReplOp)) {
- if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
- ReplOp->setHasNoSignedWrap(false);
- if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
- ReplOp->setHasNoUnsignedWrap(false);
- }
+ if (Op && ReplOp)
+ ReplOp->andIRFlags(Op);
+
if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
// FIXME: If both the original and replacement value are part of the
// same control-flow region (meaning that the execution of one
@@ -2808,6 +2804,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
if (!BI || BI->isUnconditional())
return false;
+ // If a branch has two identical successors, we cannot declare either dead.
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ return false;
+
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
if (!Cond)
return false;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e931382ea98f..6f0375487af6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -136,7 +136,7 @@ namespace {
void SinkUnusedInvariants(Loop *L);
};
-} // namespace
+}
char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
@@ -494,7 +494,7 @@ struct RewritePhi {
RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
: PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
};
-} // namespace
+}
//===----------------------------------------------------------------------===//
// RewriteLoopExitValues - Optimize IV users outside the loop.
@@ -758,7 +758,7 @@ namespace {
WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
IsSigned(false) {}
};
-} // namespace
+}
/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
@@ -1321,7 +1321,7 @@ namespace {
// Implement the interface used by simplifyUsersOfIV.
void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
};
-} // namespace
+}
/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
/// users. Each successive simplification may push more users which may
@@ -2013,11 +2013,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that we're done iterating through lists, clean up any instructions
// which are now dead.
- while (!DeadInsts.empty()) {
- Value *V = static_cast<Value *>(DeadInsts.pop_back_val());
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
- }
// The Rewriter may not be used from this point on.
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index ce1a0ca8c7d9..cbdacad8f28b 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -222,7 +222,7 @@ public:
};
char InductiveRangeCheckElimination::ID = 0;
-} // namespace
+}
INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
"Inductive range check elimination", false, false)
@@ -618,7 +618,7 @@ public:
bool run();
};
-} // namespace
+}
void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
BasicBlock *ReplaceBy) {
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 7316db6ca02c..1130d228acb8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -138,7 +138,7 @@ namespace {
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
};
-} // namespace
+}
char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index e5019463bb5f..f0e6d641b180 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -156,7 +156,7 @@ namespace {
/// Simple Analysis hook. Delete loop L from alias set map.
void deleteAnalysisLoop(Loop *L) override;
};
-} // namespace
+}
char LICM::ID = 0;
INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -777,7 +777,7 @@ namespace {
AST.deleteValue(I);
}
};
-} // namespace
+} // end anon namespace
/// Try to promote memory values to scalars by sinking stores out of the
/// loop and moving loads to before the loop. We do this by looping over
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 3dbf6ac6ed08..c19cd19059b2 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -77,7 +77,7 @@ private:
bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
};
-} // namespace
+}
bool LoadCombine::doInitialization(Function &F) {
DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 02760ffe2c68..98b068edf582 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -57,7 +57,7 @@ namespace {
bool &Changed, BasicBlock *Preheader);
};
-} // namespace
+}
char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d21a7db48c51..0325d268c325 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -635,10 +635,11 @@ public:
LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
DominatorTree *DT,
const SmallVector<int, 8> *PtrToPartition = nullptr)
- : OrigLoop(L), NonDistributedLoop(nullptr),
+ : VersionedLoop(L), NonVersionedLoop(nullptr),
PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
- /// \brief Returns true if we need memchecks to distribute the loop.
+ /// \brief Returns true if we need memchecks to disambiguate may-aliasing
+ /// accesses.
bool needsRuntimeChecks() const {
return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
}
@@ -649,49 +650,51 @@ public:
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
// Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
+ BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
std::tie(FirstCheckInst, MemRuntimeCheck) =
LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
// Rename the block to make the IR more readable.
- MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
+ MemCheckBB->setName(VersionedLoop->getHeader()->getName() +
+ ".lver.memcheck");
// Create empty preheader for the loop (and after cloning for the
- // original/nondist loop).
+ // non-versioned loop).
BasicBlock *PH =
SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
- PH->setName(OrigLoop->getHeader()->getName() + ".ph");
+ PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
// Clone the loop including the preheader.
//
// FIXME: This does not currently preserve SimplifyLoop because the exit
// block is a join between the two loops.
- SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
- NonDistributedLoop =
- cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
- LI, DT, NonDistributedLoopBlocks);
- remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
+ SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+ NonVersionedLoop =
+ cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap,
+ ".lver.orig", LI, DT, NonVersionedLoopBlocks);
+ remapInstructionsInLoop(NonVersionedLoopBlocks, VMap);
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = MemCheckBB->getTerminator();
- BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
- OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
+ BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
+ OrigTerm);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
// memchecking block.
- DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
}
/// \brief Adds the necessary PHI nodes for the versioned loops based on the
/// loop-defined values used outside of the loop.
void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- BasicBlock *PHIBlock = OrigLoop->getExitBlock();
+ BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
assert(PHIBlock && "No single successor to loop exit block");
for (auto *Inst : DefsUsedOutside) {
- auto *NonDistInst = cast<Instruction>(VMap[Inst]);
+ auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
PHINode *PN;
// First see if we have a single-operand PHI with the value defined by the
@@ -704,24 +707,25 @@ public:
}
// If not create it.
if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
PHIBlock->begin());
for (auto *User : Inst->users())
- if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
+ if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
User->replaceUsesOfWith(Inst, PN);
- PN->addIncoming(Inst, OrigLoop->getExitingBlock());
+ PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
}
- // Add the new incoming value from the non-distributed loop.
- PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
+ // Add the new incoming value from the non-versioned loop.
+ PN->addIncoming(NonVersionedLoopInst,
+ NonVersionedLoop->getExitingBlock());
}
}
private:
/// \brief The original loop. This becomes the "versioned" one, i.e. control
/// goes if the memchecks all pass.
- Loop *OrigLoop;
+ Loop *VersionedLoop;
/// \brief The fall-back loop, i.e. if any of the memchecks fail.
- Loop *NonDistributedLoop;
+ Loop *NonVersionedLoop;
/// \brief For each memory pointer it contains the partitionId it is used in.
/// If nullptr, no partitioning is used.
@@ -730,8 +734,8 @@ private:
/// If the pointer is used in multiple partitions the entry is set to -1.
const SmallVector<int, 8> *PtrToPartition;
- /// \brief This maps the instructions from OrigLoop to their counterpart in
- /// NonDistributedLoop.
+ /// \brief This maps the instructions from VersionedLoop to their counterpart
+ /// in NonVersionedLoop.
ValueToValueMapTy VMap;
/// \brief Analyses used.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3de1333a7c98..714ce914a8b3 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -209,7 +209,7 @@ namespace {
bool runOnNoncountableLoop();
bool runOnCountableLoop();
};
-} // namespace
+}
char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 4c40f249ce1d..e12502654751 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -52,7 +52,7 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
-} // namespace
+}
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index f6db9b114e3f..ed103e6b8ed6 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -438,7 +438,7 @@ namespace {
bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
ReductionTracker &Reductions);
};
-} // namespace
+}
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ba70ad1f1a7..a675e1289baf 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,7 @@ namespace {
AssumptionCache *AC;
DominatorTree *DT;
};
-} // namespace
+}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ee7248691992..4b59f3d2f6cc 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -116,7 +116,7 @@ public:
void dump() const;
};
-} // namespace
+}
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
@@ -157,7 +157,7 @@ public:
const_iterator end() const { return RegSequence.end(); }
};
-} // namespace
+}
void
RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
@@ -281,7 +281,7 @@ struct Formula {
void dump() const;
};
-} // namespace
+}
/// DoInitialMatch - Recursion helper for InitialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
@@ -903,7 +903,7 @@ private:
SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
-} // namespace
+}
/// RateRegister - Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
@@ -1102,7 +1102,7 @@ struct LSRFixup {
void dump() const;
};
-} // namespace
+}
LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
@@ -1252,7 +1252,7 @@ public:
void dump() const;
};
-} // namespace
+}
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
@@ -1791,7 +1791,7 @@ public:
void dump() const;
};
-} // namespace
+}
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast operation.
@@ -3644,7 +3644,7 @@ struct WorkItem {
void dump() const;
};
-} // namespace
+}
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
@@ -4949,7 +4949,7 @@ private:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
-} // namespace
+}
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d702dc0b4ee9..9e7558d9c45f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -229,7 +229,7 @@ namespace {
unsigned DynamicCostSavingsDiscount,
uint64_t UnrolledCost, uint64_t RolledDynamicCost);
};
-} // namespace
+}
char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5bdc2ec88d4a..cbc563bd8998 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -80,6 +81,7 @@ namespace {
struct LoopProperties {
unsigned CanBeUnswitchedCount;
+ unsigned WasUnswitchedCount;
unsigned SizeEstimation;
UnswitchedValsMap UnswitchedVals;
};
@@ -93,37 +95,52 @@ namespace {
UnswitchedValsMap *CurLoopInstructions;
LoopProperties *CurrentLoopProperties;
- // Max size of code we can produce on remained iterations.
+ // A loop unswitching with an estimated cost above this threshold
+ // is not performed. MaxSize is turned into unswitching quota for
+ // the current loop, and reduced correspondingly, though note that
+ // the quota is returned by releaseMemory() when the loop has been
+ // processed, so that MaxSize will return to its previous
+ // value. So in most cases MaxSize will equal the Threshold flag
+ // when a new loop is processed. An exception to that is that
+ // MaxSize will have a smaller value while processing nested loops
+ // that were introduced due to loop unswitching of an outer loop.
+ //
+ // FIXME: The way that MaxSize works is subtle and depends on the
+ // pass manager processing loops and calling releaseMemory() in a
+ // specific order. It would be good to find a more straightforward
+ // way of doing what MaxSize does.
unsigned MaxSize;
- public:
-
- LUAnalysisCache() :
- CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
- MaxSize(Threshold)
- {}
-
- // Analyze loop. Check its size, calculate is it possible to unswitch
- // it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
- AssumptionCache *AC);
-
- // Clean all data related to given loop.
- void forgetLoop(const Loop *L);
-
- // Mark case value as unswitched.
- // Since SI instruction can be partly unswitched, in order to avoid
- // extra unswitching in cloned loops keep track all unswitched values.
- void setUnswitched(const SwitchInst *SI, const Value *V);
-
- // Check was this case value unswitched before or not.
- bool isUnswitched(const SwitchInst *SI, const Value *V);
-
- // Clone all loop-unswitch related loop properties.
- // Redistribute unswitching quotas.
- // Note, that new loop data is stored inside the VMap.
- void cloneData(const Loop *NewLoop, const Loop *OldLoop,
- const ValueToValueMapTy &VMap);
+ public:
+ LUAnalysisCache()
+ : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
+ MaxSize(Threshold) {}
+
+ // Analyze loop. Check its size, calculate is it possible to unswitch
+ // it. Returns true if we can unswitch this loop.
+ bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+ AssumptionCache *AC);
+
+ // Clean all data related to given loop.
+ void forgetLoop(const Loop *L);
+
+ // Mark case value as unswitched.
+ // Since SI instruction can be partly unswitched, in order to avoid
+ // extra unswitching in cloned loops keep track all unswitched values.
+ void setUnswitched(const SwitchInst *SI, const Value *V);
+
+ // Check was this case value unswitched before or not.
+ bool isUnswitched(const SwitchInst *SI, const Value *V);
+
+ // Returns true if another unswitching could be done within the cost
+ // threshold.
+ bool CostAllowsUnswitching();
+
+ // Clone all loop-unswitch related loop properties.
+ // Redistribute unswitching quotas.
+ // Note, that new loop data is stored inside the VMap.
+ void cloneData(const Loop *NewLoop, const Loop *OldLoop,
+ const ValueToValueMapTy &VMap);
};
class LoopUnswitch : public LoopPass {
@@ -195,10 +212,12 @@ namespace {
/// Update the appropriate Phi nodes as we do so.
void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
- bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+ TerminatorInst *TI = nullptr);
void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock);
- void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+ BasicBlock *ExitBlock, TerminatorInst *TI);
+ void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
+ TerminatorInst *TI);
void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val, bool isEqual);
@@ -206,14 +225,15 @@ namespace {
void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
- Instruction *InsertPt);
+ Instruction *InsertPt,
+ TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
BasicBlock **LoopExit = nullptr);
};
-} // namespace
+}
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
@@ -242,12 +262,13 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
+ ++I)
Metrics.analyzeBasicBlock(*I, TTI, EphValues);
- Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+ Props.SizeEstimation = Metrics.NumInsts;
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+ Props.WasUnswitchedCount = 0;
MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
if (Metrics.notDuplicatable) {
@@ -258,13 +279,6 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
}
}
- if (!Props.CanBeUnswitchedCount) {
- DEBUG(dbgs() << "NOT unswitching loop %"
- << L->getHeader()->getName() << ", cost too high: "
- << L->getBlocks().size() << "\n");
- return false;
- }
-
// Be careful. This links are good only before new loop addition.
CurrentLoopProperties = &Props;
CurLoopInstructions = &Props.UnswitchedVals;
@@ -279,7 +293,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
if (LIt != LoopsProperties.end()) {
LoopProperties &Props = LIt->second;
- MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+ MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
+ Props.SizeEstimation;
LoopsProperties.erase(LIt);
}
@@ -299,6 +314,10 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
return (*CurLoopInstructions)[SI].count(V);
}
+bool LUAnalysisCache::CostAllowsUnswitching() {
+ return CurrentLoopProperties->CanBeUnswitchedCount > 0;
+}
+
// Clone all loop-unswitch related loop properties.
// Redistribute unswitching quotas.
// Note, that new loop data is stored inside the VMap.
@@ -312,6 +331,8 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
// Reallocate "can-be-unswitched quota"
--OldLoopProps.CanBeUnswitchedCount;
+ ++OldLoopProps.WasUnswitchedCount;
+ NewLoopProps.WasUnswitchedCount = 0;
unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
NewLoopProps.CanBeUnswitchedCount = Quota / 2;
OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
@@ -453,8 +474,8 @@ bool LoopUnswitch::processCurrentLoop() {
// unswitch on it if we desire.
Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
currentLoop, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond,
- ConstantInt::getTrue(Context))) {
+ if (LoopCond &&
+ UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
++NumBranches;
return true;
}
@@ -643,7 +664,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+ TerminatorInst *TI) {
Function *F = loopHeader->getParent();
Constant *CondVal = nullptr;
BasicBlock *ExitBlock = nullptr;
@@ -651,17 +673,25 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
// If the condition is trivial, always unswitch. There is no code growth
// for this case.
- UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
return true;
}
// Check to see if it would be profitable to unswitch current loop.
+ if (!BranchesInfo.CostAllowsUnswitching()) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName()
+ << " at non-trivial condition '" << *Val
+ << "' == " << *LoopCond << "\n"
+ << ". Cost too high.\n");
+ return false;
+ }
// Do not do non-trivial unswitch while optimizing for size.
if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
return false;
- UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
}
@@ -685,25 +715,65 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
return New;
}
+static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
+ bool Swapped) {
+ if (!SrcInst || !SrcInst->hasMetadata())
+ return;
+
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ SrcInst->getAllMetadata(MDs);
+ for (auto &MD : MDs) {
+ switch (MD.first) {
+ default:
+ break;
+ case LLVMContext::MD_prof:
+ if (Swapped && MD.second->getNumOperands() == 3 &&
+ isa<MDString>(MD.second->getOperand(0))) {
+ MDString *MDName = cast<MDString>(MD.second->getOperand(0));
+ if (MDName->getString() == "branch_weights") {
+ auto *ValT = cast_or_null<ConstantAsMetadata>(
+ MD.second->getOperand(1))->getValue();
+ auto *ValF = cast_or_null<ConstantAsMetadata>(
+ MD.second->getOperand(2))->getValue();
+ assert(ValT && ValF && "Invalid Operands of branch_weights");
+ auto NewMD =
+ MDBuilder(DstInst->getParent()->getContext())
+ .createBranchWeights(cast<ConstantInt>(ValF)->getZExtValue(),
+ cast<ConstantInt>(ValT)->getZExtValue());
+ MD.second = NewMD;
+ }
+ }
+ // fallthrough.
+ case LLVMContext::MD_dbg:
+ DstInst->setMetadata(MD.first, MD.second);
+ }
+ }
+}
+
/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
/// code immediately before InsertPt.
void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
- Instruction *InsertPt) {
+ Instruction *InsertPt,
+ TerminatorInst *TI) {
// Insert a conditional branch on LIC to the two preheaders. The original
// code is the true version and the new code is the false version.
Value *BranchVal = LIC;
+ bool Swapped = false;
if (!isa<ConstantInt>(Val) ||
Val->getType() != Type::getInt1Ty(LIC->getContext()))
BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext()))
+ else if (Val != ConstantInt::getTrue(Val->getContext())) {
// We want to enter the new loop when the condition is true.
std::swap(TrueDest, FalseDest);
+ Swapped = true;
+ }
// Insert the new branch.
BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+ copyMetadata(BI, TI, Swapped);
// If either edge is critical, split it. This helps preserve LoopSimplify
// form for enclosing loops.
@@ -717,13 +787,14 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
/// where the path through the loop that doesn't execute its body has no
/// side-effects), unswitch it. This doesn't involve any code duplication, just
/// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
- Constant *Val,
- BasicBlock *ExitBlock) {
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+ BasicBlock *ExitBlock,
+ TerminatorInst *TI) {
DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
- << loopHeader->getName() << " [" << L->getBlocks().size()
- << " blocks] in Function " << L->getHeader()->getParent()->getName()
- << " on cond: " << *Val << " == " << *Cond << "\n");
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function "
+ << L->getHeader()->getParent()->getName() << " on cond: " << *Val
+ << " == " << *Cond << "\n");
// First step, split the preheader, so that we know that there is a safe place
// to insert the conditional branch. We will change loopPreheader to have a
@@ -744,7 +815,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
- loopPreheader->getTerminator());
+ loopPreheader->getTerminator(), TI);
LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
loopPreheader->getTerminator()->eraseFromParent();
@@ -780,7 +851,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
/// to unswitch when LIC equal Val. Split it into loop versions and test the
/// condition outside of either loop. Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
- Loop *L) {
+ Loop *L, TerminatorInst *TI) {
Function *F = loopHeader->getParent();
DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
<< loopHeader->getName() << " [" << L->getBlocks().size()
@@ -897,7 +968,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
"Preheader splitting did not work correctly!");
// Emit the new branch that selects between the two versions of this loop.
- EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+ EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
+ TI);
LPM->deleteSimpleAnalysisValue(OldBR, L);
OldBR->eraseFromParent();
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index b8b35d4249f0..3314e1ed41ab 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -138,7 +138,7 @@ namespace {
return Changed;
}
};
-} // namespace
+}
char LowerAtomic::ID = 0;
INITIALIZE_PASS(LowerAtomic, "loweratomic",
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index b845c038e67e..0c47cbd5bfda 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -181,7 +181,7 @@ public:
bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
};
-} // namespace
+}
char LowerExpectIntrinsic::ID = 0;
INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2c9f93513ae2..85012afc80ac 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -153,7 +153,7 @@ struct MemsetRange {
bool isProfitableToUseMemset(const DataLayout &DL) const;
};
-} // namespace
+} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -237,7 +237,7 @@ public:
};
-} // namespace
+} // end anon namespace
/// addRange - Add a new store to the MemsetRanges data structure. This adds a
@@ -355,7 +355,7 @@ namespace {
};
char MemCpyOpt::ID = 0;
-} // namespace
+}
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 886b6f5b0a2c..243db8d70ca2 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -156,7 +156,7 @@ private:
};
char MergedLoadStoreMotion::ID = 0;
-} // namespace
+}
///
/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 4cf68b00da0a..f42f8306fccc 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -74,21 +74,18 @@
// 1) We only considers n-ary adds for now. This should be extended and
// generalized.
//
-// 2) Besides arithmetic operations, similar reassociation can be applied to
-// GEPs. For example, if
-// X = &arr[a]
-// dominates
-// Y = &arr[a + b]
-// we may rewrite Y into X + b.
-//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -115,6 +112,7 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -163,12 +161,18 @@ private:
// GEP's pointer size, i.e., whether Index needs to be sign-extended in order
// to be an index of GEP.
bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+ // Returns whether V is known to be non-negative at context \c Ctxt.
+ bool isKnownNonNegative(Value *V, Instruction *Ctxt);
+ // Returns whether AO may sign overflow at context \c Ctxt. It computes a
+ // conservative result -- it answers true when not sure.
+ bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
+ AssumptionCache *AC;
+ const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
TargetTransformInfo *TTI;
- const DataLayout *DL;
// A lookup table quickly telling which instructions compute the given SCEV.
// Note that there can be multiple instructions at different locations
// computing to the same SCEV, so we map a SCEV to an instruction list. For
@@ -185,6 +189,7 @@ private:
char NaryReassociate::ID = 0;
INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -200,6 +205,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -346,18 +352,44 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
}
+bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
+ bool NonNegative, Negative;
+ // TODO: ComputeSignBits is expensive. Consider caching the results.
+ ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
+ return NonNegative;
+}
+
+bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
+ if (AO->hasNoSignedWrap())
+ return false;
+
+ Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
+ // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
+ // TODO: handle the negative case as well.
+ if (isKnownNonNegative(AO, Ctxt) &&
+ (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
+ return false;
+
+ return true;
+}
+
GetElementPtrInst *
NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
Type *IndexedType) {
Value *IndexToSplit = GEP->getOperand(I + 1);
- if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit))
+ if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
IndexToSplit = SExt->getOperand(0);
+ } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
+ // zext can be treated as sext if the source is non-negative.
+ if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+ IndexToSplit = ZExt->getOperand(0);
+ }
if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
// If the I-th index needs sext and the underlying add is not equipped with
// nsw, we cannot split the add because
// sext(LHS + RHS) != sext(LHS) + sext(RHS).
- if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap())
+ if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
return nullptr;
Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
// IndexToSplit = LHS + RHS.
@@ -373,10 +405,9 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
return nullptr;
}
-GetElementPtrInst *
-NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
- Value *LHS, Value *RHS,
- Type *IndexedType) {
+GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
+ GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS,
+ Type *IndexedType) {
// Look for GEP's closest dominator that has the same SCEV as GEP except that
// the I-th index is replaced with LHS.
SmallVector<const SCEV *, 4> IndexExprs;
@@ -384,6 +415,16 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
IndexExprs.push_back(SE->getSCEV(*Index));
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
+ if (isKnownNonNegative(LHS, GEP) &&
+ DL->getTypeSizeInBits(LHS->getType()) <
+ DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
+ // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
+ // zext if the source operand is proved non-negative. We should do that
+ // consistently so that CandidateExpr more likely appears before. See
+ // @reassociate_gep_assume for an example of this canonicalization.
+ IndexExprs[I] =
+ SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
+ }
const SCEV *CandidateExpr = SE->getGEPExpr(
GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
IndexExprs, GEP->isInBounds());
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 5423499723f7..31d7df39c781 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -46,7 +46,7 @@ namespace {
};
char PartiallyInlineLibCalls::ID = 0;
-} // namespace
+}
INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
"Partially inline calls to library functions", false, false)
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 670dcd24f75c..9ecaf102574a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -160,7 +160,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
AU.setPreservesAll();
}
};
-} // namespace
+}
static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
@@ -181,7 +181,7 @@ struct PlaceSafepoints : public FunctionPass {
// if that was worth doing
}
};
-} // namespace
+}
// Insert a safepoint poll immediately before the given instruction. Does
// not handle the parsability of state at the runtime call, that's the
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 9842fd7bb6c7..d1acf785d07e 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -154,7 +154,7 @@ namespace {
unsigned SymbolicRank;
bool isOr;
};
-} // namespace
+}
namespace {
class Reassociate : public FunctionPass {
@@ -197,7 +197,7 @@ namespace {
void OptimizeInst(Instruction *I);
Instruction *canonicalizeNegConstExpr(Instruction *I);
};
-} // namespace
+}
XorOpnd::XorOpnd(Value *V) {
assert(!isa<ConstantInt>(V) && "No ConstantInt");
@@ -936,6 +936,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Push the negates through the add.
I->setOperand(0, NegateValue(I->getOperand(0), BI));
I->setOperand(1, NegateValue(I->getOperand(1), BI));
+ if (I->getOpcode() == Instruction::Add) {
+ I->setHasNoUnsignedWrap(false);
+ I->setHasNoSignedWrap(false);
+ }
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
@@ -976,6 +980,12 @@ static Value *NegateValue(Value *V, Instruction *BI) {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
TheNeg->moveBefore(InsertPt);
+ if (TheNeg->getOpcode() == Instruction::Sub) {
+ TheNeg->setHasNoUnsignedWrap(false);
+ TheNeg->setHasNoSignedWrap(false);
+ } else {
+ TheNeg->andIRFlags(BI);
+ }
return TheNeg;
}
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 2ff56e67c9c6..1b46727c17bb 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -58,7 +58,7 @@ namespace {
bool runOnFunction(Function &F) override;
};
-} // namespace
+}
char RegToMem::ID = 0;
INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index c15bc1bd7eca..ae2ae3af0c7a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -183,7 +183,7 @@ struct PartiallyConstructedSafepointRecord {
/// Maps rematerialized copy to it's original value.
RematerializedValueMapTy RematerializedValues;
};
-} // namespace
+}
/// Compute the live-in set for every basic block in the function
static void computeLiveInValues(DominatorTree &DT, Function &F,
@@ -294,12 +294,17 @@ static void analyzeParsePointLiveness(
static Value *findBaseDefiningValue(Value *I);
-/// If we can trivially determine that the index specified in the given vector
-/// is a base pointer, return it. In cases where the entire vector is known to
-/// consist of base pointers, the entire vector will be returned. This
-/// indicates that the relevant extractelement is a valid base pointer and
-/// should be used directly.
-static Value *findBaseOfVector(Value *I, Value *Index) {
+/// Return a base defining value for the 'Index' element of the given vector
+/// instruction 'I'. If Index is null, returns a BDV for the entire vector
+/// 'I'. As an optimization, this method will try to determine when the
+/// element is known to already be a base pointer. If this can be established,
+/// the second value in the returned pair will be true. Note that either a
+/// vector or a pointer typed value can be returned. For the former, the
+/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
+/// If the later, the return pointer is a BDV (or possibly a base) for the
+/// particular element in 'I'.
+static std::pair<Value *, bool>
+findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
assert(I->getType()->isVectorTy() &&
cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
@@ -309,7 +314,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
- return I;
+ return std::make_pair(I, true);
// We shouldn't see the address of a global as a vector value?
assert(!isa<GlobalVariable>(I) &&
@@ -320,7 +325,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with partially optimized
// code.
- return I;
+ return std::make_pair(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
@@ -328,38 +333,56 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
assert(Con->isNullValue() && "null is the only case which makes sense");
- return Con;
+ return std::make_pair(Con, true);
}
-
+
if (isa<LoadInst>(I))
- return I;
-
+ return std::make_pair(I, true);
+
// For an insert element, we might be able to look through it if we know
- // something about the indexes, but if the indices are arbitrary values, we
- // can't without much more extensive scalarization.
+ // something about the indexes.
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
- Value *InsertIndex = IEI->getOperand(2);
- // This index is inserting the value, look for it's base
- if (InsertIndex == Index)
- return findBaseDefiningValue(IEI->getOperand(1));
- // Both constant, and can't be equal per above. This insert is definitely
- // not relevant, look back at the rest of the vector and keep trying.
- if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
- return findBaseOfVector(IEI->getOperand(0), Index);
- }
-
- // Note: This code is currently rather incomplete. We are essentially only
- // handling cases where the vector element is trivially a base pointer. We
- // need to update the entire base pointer construction algorithm to know how
- // to track vector elements and potentially scalarize, but the case which
- // would motivate the work hasn't shown up in real workloads yet.
- llvm_unreachable("no base found for vector element");
+ if (Index) {
+ Value *InsertIndex = IEI->getOperand(2);
+ // This index is inserting the value, look for its BDV
+ if (InsertIndex == Index)
+ return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
+ // Both constant, and can't be equal per above. This insert is definitely
+ // not relevant, look back at the rest of the vector and keep trying.
+ if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
+ return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
+ }
+
+ // We don't know whether this vector contains entirely base pointers or
+ // not. To be conservatively correct, we treat it as a BDV and will
+ // duplicate code as needed to construct a parallel vector of bases.
+ return std::make_pair(IEI, false);
+ }
+
+ if (isa<ShuffleVectorInst>(I))
+ // We don't know whether this vector contains entirely base pointers or
+ // not. To be conservatively correct, we treat it as a BDV and will
+ // duplicate code as needed to construct a parallel vector of bases.
+ // TODO: There a number of local optimizations which could be applied here
+ // for particular sufflevector patterns.
+ return std::make_pair(I, false);
+
+ // A PHI or Select is a base defining value. The outer findBasePointer
+ // algorithm is responsible for constructing a base value for this BDV.
+ assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+ "unknown vector instruction - no base found for vector element");
+ return std::make_pair(I, false);
}
+static bool isKnownBaseResult(Value *V);
+
/// Helper function for findBasePointer - Will return a value which either a)
/// defines the base pointer for the input or b) blocks the simple search
/// (i.e. a PHI or Select of two derived pointers)
static Value *findBaseDefiningValue(Value *I) {
+ if (I->getType()->isVectorTy())
+ return findBaseDefiningValueOfVector(I).first;
+
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
@@ -370,16 +393,39 @@ static Value *findBaseDefiningValue(Value *I) {
if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
Value *VectorOperand = EEI->getVectorOperand();
Value *Index = EEI->getIndexOperand();
- Value *VectorBase = findBaseOfVector(VectorOperand, Index);
- // If the result returned is a vector, we know the entire vector must
- // contain base pointers. In that case, the extractelement is a valid base
- // for this value.
- if (VectorBase->getType()->isVectorTy())
- return EEI;
- // Otherwise, we needed to look through the vector to find the base for
- // this particular element.
- assert(VectorBase->getType()->isPointerTy());
- return VectorBase;
+ std::pair<Value *, bool> pair =
+ findBaseDefiningValueOfVector(VectorOperand, Index);
+ Value *VectorBase = pair.first;
+ if (VectorBase->getType()->isPointerTy())
+ // We found a BDV for this specific element with the vector. This is an
+ // optimization, but in practice it covers most of the useful cases
+ // created via scalarization.
+ return VectorBase;
+ else {
+ assert(VectorBase->getType()->isVectorTy());
+ if (pair.second)
+ // If the entire vector returned is known to be entirely base pointers,
+ // then the extractelement is valid base for this value.
+ return EEI;
+ else {
+ // Otherwise, we have an instruction which potentially produces a
+ // derived pointer and we need findBasePointers to clone code for us
+ // such that we can create an instruction which produces the
+ // accompanying base pointer.
+ // Note: This code is currently rather incomplete. We don't currently
+ // support the general form of shufflevector of insertelement.
+ // Conceptually, these are just 'base defining values' of the same
+ // variety as phi or select instructions. We need to update the
+ // findBasePointers algorithm to insert new 'base-only' versions of the
+ // original instructions. This is relative straight forward to do, but
+ // the case which would motivate the work hasn't shown up in real
+ // workloads yet.
+ assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
+ "need to extend findBasePointers for generic vector"
+ "instruction cases");
+ return VectorBase;
+ }
+ }
}
if (isa<Argument>(I))
@@ -646,7 +692,7 @@ private:
llvm_unreachable("only three states!");
}
};
-} // namespace
+}
/// For a given value or instruction, figure out what base ptr it's derived
/// from. For gc objects, this is simply itself. On success, returns a value
/// which is the base pointer. (This is reliable and can be used for
@@ -1712,7 +1758,9 @@ static void findLiveReferences(
/// slightly non-trivial since it requires a format change. Given how rare
/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
static void splitVectorValues(Instruction *StatepointInst,
- StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+ StatepointLiveSetTy &LiveSet,
+ DenseMap<Value *, Value *>& PointerToBase,
+ DominatorTree &DT) {
SmallVector<Value *, 16> ToSplit;
for (Value *V : LiveSet)
if (isa<VectorType>(V->getType()))
@@ -1721,14 +1769,14 @@ static void splitVectorValues(Instruction *StatepointInst,
if (ToSplit.empty())
return;
+ DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
+
Function &F = *(StatepointInst->getParent()->getParent());
DenseMap<Value *, AllocaInst *> AllocaMap;
// First is normal return, second is exceptional return (invoke only)
DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
for (Value *V : ToSplit) {
- LiveSet.erase(V);
-
AllocaInst *Alloca =
new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
AllocaMap[V] = Alloca;
@@ -1738,7 +1786,7 @@ static void splitVectorValues(Instruction *StatepointInst,
SmallVector<Value *, 16> Elements;
for (unsigned i = 0; i < VT->getNumElements(); i++)
Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
- LiveSet.insert(Elements.begin(), Elements.end());
+ ElementMapping[V] = Elements;
auto InsertVectorReform = [&](Instruction *IP) {
Builder.SetInsertPoint(IP);
@@ -1771,6 +1819,7 @@ static void splitVectorValues(Instruction *StatepointInst,
Replacements[V].second = InsertVectorReform(IP);
}
}
+
for (Value *V : ToSplit) {
AllocaInst *Alloca = AllocaMap[V];
@@ -1814,6 +1863,25 @@ static void splitVectorValues(Instruction *StatepointInst,
for (Value *V : ToSplit)
Allocas.push_back(AllocaMap[V]);
PromoteMemToReg(Allocas, DT);
+
+ // Update our tracking of live pointers and base mappings to account for the
+ // changes we just made.
+ for (Value *V : ToSplit) {
+ auto &Elements = ElementMapping[V];
+
+ LiveSet.erase(V);
+ LiveSet.insert(Elements.begin(), Elements.end());
+ // We need to update the base mapping as well.
+ assert(PointerToBase.count(V));
+ Value *OldBase = PointerToBase[V];
+ auto &BaseElements = ElementMapping[OldBase];
+ PointerToBase.erase(V);
+ assert(Elements.size() == BaseElements.size());
+ for (unsigned i = 0; i < Elements.size(); i++) {
+ Value *Elem = Elements[i];
+ PointerToBase[Elem] = BaseElements[i];
+ }
+ }
}
// Helper function for the "rematerializeLiveValues". It walks use chain
@@ -2075,17 +2143,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// site.
findLiveReferences(F, DT, P, toUpdate, records);
- // Do a limited scalarization of any live at safepoint vector values which
- // contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. TODO: it would be nice to
- // natively support vectors all the way through the backend so we don't need
- // to scalarize here.
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- Instruction *statepoint = toUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
- }
-
// B) Find the base pointers for each live pointer
/* scope for caching */ {
// Cache the 'defining value' relation used in the computation and
@@ -2146,6 +2203,18 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
}
holders.clear();
+ // Do a limited scalarization of any live at safepoint vector values which
+ // contain pointers. This enables this pass to run after vectorization at
+ // the cost of some possible performance loss. TODO: it would be nice to
+ // natively support vectors all the way through the backend so we don't need
+ // to scalarize here.
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ Instruction *statepoint = toUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(statepoint), info.liveset,
+ info.PointerToBase, DT);
+ }
+
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purelly an optimization and
// does not influence correctness.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index bc068f78c576..305175ff8f73 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1055,7 +1055,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
// load null -> null
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
- return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ return markConstant(IV, &I, UndefValue::get(I.getType()));
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f38b2b1dbf96..056dd11b5ab3 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -127,7 +127,7 @@ typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
IRBuilderTy;
#endif
-} // namespace
+}
namespace {
/// \brief A used slice of an alloca.
@@ -595,7 +595,7 @@ private:
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
};
-} // namespace
+}
static Value *foldSelectInst(SelectInst &SI) {
// If the condition being selected on is a constant or the same value is
@@ -1173,7 +1173,7 @@ public:
}
}
};
-} // namespace
+} // end anon namespace
namespace {
/// \brief An optimization pass providing Scalar Replacement of Aggregates.
@@ -1268,7 +1268,7 @@ private:
void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
-} // namespace
+}
char SROA::ID = 0;
@@ -3119,7 +3119,7 @@ private:
return true;
}
};
-} // namespace
+}
namespace {
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3327,7 +3327,7 @@ private:
return false;
}
};
-} // namespace
+}
/// \brief Strip aggregate type wrapping.
///
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 69e3a67aa8c1..c8dfa54a4aa0 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -174,7 +174,7 @@ protected:
/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
};
-} // namespace
+}
/// \brief Print the weight of edge \p E on stream \p OS.
///
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e42c3daab8d7..d955da7ce75d 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -221,7 +221,7 @@ namespace {
}
};
-} // namespace
+}
char SROA_DT::ID = 0;
char SROA_SSAUp::ID = 0;
@@ -1123,7 +1123,7 @@ public:
}
}
};
-} // namespace
+} // end anon namespace
/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
/// subsequently loaded can be rewritten to load both input pointers and then
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 0733daf40f39..231411a16c05 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -48,8 +48,8 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
STATISTIC(NumSimpl, "Number of blocks simplified");
-/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
-/// node) return blocks, merge them together to promote recursive block merging.
+/// If we have more than one empty (other than phi node) return blocks,
+/// merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
@@ -124,7 +124,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
return Changed;
}
-/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
AssumptionCache *AC,
@@ -134,8 +134,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks and remove them if they are unneeded...
- //
+ // Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
@@ -159,7 +158,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
// removeUnreachableBlocks is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning iterativelySimplifyCFG if the second pass of
+ // avoid rerunning iterativelySimplifyCFG if the second pass of
// removeUnreachableBlocks doesn't do anything.
if (!removeUnreachableBlocks(F))
return true;
@@ -220,7 +219,7 @@ struct CFGSimplifyPass : public FunctionPass {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
};
-} // namespace
+}
char CFGSimplifyPass::ID = 0;
INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index f32769c24110..6d9d417ef943 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -224,11 +224,13 @@ FunctionPass *llvm::createStraightLineStrengthReducePass() {
bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
const Candidate &C) {
return (Basis.Ins != C.Ins && // skip the same instruction
+ // They must have the same type too. Basis.Base == C.Base doesn't
+ // guarantee their types are the same (PR23975).
+ Basis.Ins->getType() == C.Ins->getType() &&
// Basis must dominate C in order to rewrite C with respect to Basis.
DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
// They share the same base, stride, and candidate kind.
- Basis.Base == C.Base &&
- Basis.Stride == C.Stride &&
+ Basis.Base == C.Base && Basis.Stride == C.Stride &&
Basis.CandidateKind == C.CandidateKind);
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index d23f5153c188..c7de2e2965c7 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -120,7 +120,7 @@ namespace {
bool CanMoveAboveCall(Instruction *I, CallInst *CI);
Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
};
-} // namespace
+}
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
@@ -246,7 +246,7 @@ struct AllocaDerivedValueTracker {
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
-} // namespace
+}
bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
if (F.callsFunctionThatReturnsTwice())