From 1a82d4c088707c791c792f6822f611b47a12bdfe Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 5 Jul 2015 14:21:36 +0000 Subject: Vendor import of llvm trunk r241361: https://llvm.org/svn/llvm-project/llvm/trunk@241361 --- lib/Transforms/Scalar/ADCE.cpp | 2 +- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 2 +- lib/Transforms/Scalar/BDCE.cpp | 2 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 2 +- lib/Transforms/Scalar/ConstantProp.cpp | 2 +- .../Scalar/CorrelatedValuePropagation.cpp | 2 +- lib/Transforms/Scalar/DCE.cpp | 4 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +- lib/Transforms/Scalar/EarlyCSE.cpp | 12 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 2 +- lib/Transforms/Scalar/Float2Int.cpp | 2 +- lib/Transforms/Scalar/GVN.cpp | 20 +-- lib/Transforms/Scalar/IndVarSimplify.cpp | 15 +- .../Scalar/InductiveRangeCheckElimination.cpp | 4 +- lib/Transforms/Scalar/JumpThreading.cpp | 2 +- lib/Transforms/Scalar/LICM.cpp | 4 +- lib/Transforms/Scalar/LoadCombine.cpp | 2 +- lib/Transforms/Scalar/LoopDeletion.cpp | 2 +- lib/Transforms/Scalar/LoopDistribute.cpp | 54 +++--- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 2 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 2 +- lib/Transforms/Scalar/LoopRotation.cpp | 2 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 18 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 194 ++++++++++++++------- lib/Transforms/Scalar/LowerAtomic.cpp | 2 +- lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 2 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 6 +- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp | 2 +- lib/Transforms/Scalar/NaryReassociate.cpp | 69 ++++++-- lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 2 +- lib/Transforms/Scalar/PlaceSafepoints.cpp | 4 +- lib/Transforms/Scalar/Reassociate.cpp | 14 +- lib/Transforms/Scalar/Reg2Mem.cpp | 2 +- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 183 +++++++++++++------ lib/Transforms/Scalar/SCCP.cpp | 2 +- lib/Transforms/Scalar/SROA.cpp | 12 +- lib/Transforms/Scalar/SampleProfile.cpp | 2 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 4 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 +- .../Scalar/StraightLineStrengthReduce.cpp | 6 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 4 +- 43 files changed, 442 insertions(+), 246 deletions(-) (limited to 'lib/Transforms/Scalar') diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index fe0224bb56c7..d6fc91641588 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -44,7 +44,7 @@ struct ADCE : public FunctionPass { AU.setPreservesCFG(); } }; -} // namespace +} char ADCE::ID = 0; INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index a4e5446a2b12..8918909f484a 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -76,7 +76,7 @@ struct AlignmentFromAssumptions : public FunctionPass { const SCEV *&OffSCEV); bool processAssumption(CallInst *I); }; -} // namespace +} char AlignmentFromAssumptions::ID = 0; static const char aip_name[] = "Alignment from assumptions"; diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index 8ffbacddda68..09c605e76737 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -66,7 +66,7 @@ struct BDCE : public FunctionPass { AssumptionCache *AC; DominatorTree *DT; }; -} // namespace +} char BDCE::ID = 0; INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination", diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index cc1dc9435a05..4288742dd3eb 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -171,7 +171,7 @@ private: void deleteDeadCastInst() const; bool optimizeConstants(Function &Fn); }; -} // namespace +} char ConstantHoisting::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting", diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index e3df86ecf169..c974ebb9456f 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -47,7 +47,7 @@ namespace { AU.addRequired(); } }; -} // namespace +} char ConstantPropagation::ID = 0; INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop", diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b1809b7fae08..79624b2e4c47 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -56,7 +56,7 @@ namespace { AU.addRequired(); } }; -} // namespace +} char CorrelatedValuePropagation::ID = 0; INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index aa628e5aca81..3b262a23091f 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -60,7 +60,7 @@ namespace { AU.setPreservesCFG(); } }; -} // namespace +} char DeadInstElimination::ID = 0; INITIALIZE_PASS(DeadInstElimination, "die", @@ -87,7 +87,7 @@ namespace { AU.setPreservesCFG(); } }; -} // namespace +} char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c99dc5fc8445..c50558434da2 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -92,7 +92,7 @@ namespace { AU.addPreserved(); } }; -} // namespace +} char DSE::ID = 0; INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 8b629eaca9d4..d536a937dce1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -72,7 +72,7 @@ struct SimpleValue { isa(Inst) || isa(Inst); } }; -} // namespace +} namespace llvm { template <> struct DenseMapInfo { @@ -85,7 +85,7 @@ template <> struct DenseMapInfo { static unsigned getHashValue(SimpleValue Val); static bool isEqual(SimpleValue LHS, SimpleValue RHS); }; -} // namespace llvm +} unsigned DenseMapInfo::getHashValue(SimpleValue Val) { Instruction *Inst = Val.Inst; @@ -219,7 +219,7 @@ struct CallValue { return true; } }; -} // namespace +} namespace llvm { template <> struct DenseMapInfo { @@ -232,7 +232,7 @@ template <> struct DenseMapInfo { static unsigned getHashValue(CallValue Val); static bool isEqual(CallValue LHS, CallValue RHS); }; -} // namespace llvm +} unsigned DenseMapInfo::getHashValue(CallValue Val) { Instruction *Inst = Val.Inst; @@ -447,7 +447,7 @@ private: ExpectedType); } }; -} // namespace +} bool EarlyCSE::processNode(DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); @@ -764,7 +764,7 @@ public: AU.setPreservesCFG(); } }; -} // namespace +} char EarlyCSELegacyPass::ID = 0; diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index dd6ea8d455c5..0430c1898c8d 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -36,7 +36,7 @@ public: private: AliasAnalysis *AA; }; -} // namespace +} char FlattenCFGPass::ID = 0; INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false, diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index bb90c5f73239..c9314229c38b 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -79,7 +79,7 @@ namespace { MapVector ConvertedInsts; LLVMContext *Ctx; }; -} // namespace +} char Float2Int::ID = 0; INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index d9308c4e3710..60903c8b4aaf 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -138,7 +138,7 @@ namespace { uint32_t getNextUnusedValueNumber() { return nextValueNumber; } void verifyRemoved(const Value *) const; }; -} // namespace +} namespace llvm { template <> struct DenseMapInfo { @@ -159,7 +159,7 @@ template <> struct DenseMapInfo { } }; -} // namespace llvm +} //===----------------------------------------------------------------------===// // ValueTable Internal Functions @@ -723,7 +723,7 @@ namespace { }; char GVN::ID = 0; -} // namespace +} // The public interface to this file... FunctionPass *llvm::createGVNPass(bool NoLoads) { @@ -1783,13 +1783,9 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { // being replaced. BinaryOperator *Op = dyn_cast(I); BinaryOperator *ReplOp = dyn_cast(Repl); - if (Op && ReplOp && isa(Op) && - isa(ReplOp)) { - if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) - ReplOp->setHasNoSignedWrap(false); - if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) - ReplOp->setHasNoUnsignedWrap(false); - } + if (Op && ReplOp) + ReplOp->andIRFlags(Op); + if (Instruction *ReplInst = dyn_cast(Repl)) { // FIXME: If both the original and replacement value are part of the // same control-flow region (meaning that the execution of one @@ -2808,6 +2804,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) { if (!BI || BI->isUnconditional()) return false; + // If a branch has two identical successors, we cannot declare either dead. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + return false; + ConstantInt *Cond = dyn_cast(BI->getCondition()); if (!Cond) return false; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index e931382ea98f..6f0375487af6 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -136,7 +136,7 @@ namespace { void SinkUnusedInvariants(Loop *L); }; -} // namespace +} char IndVarSimplify::ID = 0; INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", @@ -494,7 +494,7 @@ struct RewritePhi { RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S) : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {} }; -} // namespace +} //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. @@ -758,7 +758,7 @@ namespace { WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), IsSigned(false) {} }; -} // namespace +} /// visitCast - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine @@ -1321,7 +1321,7 @@ namespace { // Implement the interface used by simplifyUsersOfIV. void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } }; -} // namespace +} /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV /// users. Each successive simplification may push more users which may @@ -2013,11 +2013,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that we're done iterating through lists, clean up any instructions // which are now dead. - while (!DeadInsts.empty()) { - Value *V = static_cast(DeadInsts.pop_back_val()); - if (Instruction *Inst = dyn_cast_or_null(V)) + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); - } // The Rewriter may not be used from this point on. diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index ce1a0ca8c7d9..cbdacad8f28b 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -222,7 +222,7 @@ public: }; char InductiveRangeCheckElimination::ID = 0; -} // namespace +} INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", "Inductive range check elimination", false, false) @@ -618,7 +618,7 @@ public: bool run(); }; -} // namespace +} void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, BasicBlock *ReplaceBy) { diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 7316db6ca02c..1130d228acb8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -138,7 +138,7 @@ namespace { bool SimplifyPartiallyRedundantLoad(LoadInst *LI); bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); }; -} // namespace +} char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index e5019463bb5f..f0e6d641b180 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -156,7 +156,7 @@ namespace { /// Simple Analysis hook. Delete loop L from alias set map. void deleteAnalysisLoop(Loop *L) override; }; -} // namespace +} char LICM::ID = 0; INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) @@ -777,7 +777,7 @@ namespace { AST.deleteValue(I); } }; -} // namespace +} // end anon namespace /// Try to promote memory values to scalars by sinking stores out of the /// loop and moving loads to before the loop. We do this by looping over diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 3dbf6ac6ed08..c19cd19059b2 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -77,7 +77,7 @@ private: bool aggregateLoads(SmallVectorImpl &); bool combineLoads(SmallVectorImpl &); }; -} // namespace +} bool LoadCombine::doInitialization(Function &F) { DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 02760ffe2c68..98b068edf582 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -57,7 +57,7 @@ namespace { bool &Changed, BasicBlock *Preheader); }; -} // namespace +} char LoopDeletion::ID = 0; INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index d21a7db48c51..0325d268c325 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -635,10 +635,11 @@ public: LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, DominatorTree *DT, const SmallVector *PtrToPartition = nullptr) - : OrigLoop(L), NonDistributedLoop(nullptr), + : VersionedLoop(L), NonVersionedLoop(nullptr), PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {} - /// \brief Returns true if we need memchecks to distribute the loop. + /// \brief Returns true if we need memchecks to disambiguate may-aliasing + /// accesses. bool needsRuntimeChecks() const { return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition); } @@ -649,49 +650,51 @@ public: Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader(); + BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader(); std::tie(FirstCheckInst, MemRuntimeCheck) = LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); // Rename the block to make the IR more readable. - MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck"); + MemCheckBB->setName(VersionedLoop->getHeader()->getName() + + ".lver.memcheck"); // Create empty preheader for the loop (and after cloning for the - // original/nondist loop). + // non-versioned loop). BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); - PH->setName(OrigLoop->getHeader()->getName() + ".ph"); + PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. // // FIXME: This does not currently preserve SimplifyLoop because the exit // block is a join between the two loops. - SmallVector NonDistributedLoopBlocks; - NonDistributedLoop = - cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist", - LI, DT, NonDistributedLoopBlocks); - remapInstructionsInLoop(NonDistributedLoopBlocks, VMap); + SmallVector NonVersionedLoopBlocks; + NonVersionedLoop = + cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, + ".lver.orig", LI, DT, NonVersionedLoopBlocks); + remapInstructionsInLoop(NonVersionedLoopBlocks, VMap); // Insert the conditional branch based on the result of the memchecks. Instruction *OrigTerm = MemCheckBB->getTerminator(); - BranchInst::Create(NonDistributedLoop->getLoopPreheader(), - OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm); + BranchInst::Create(NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader(), MemRuntimeCheck, + OrigTerm); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the // memchecking block. - DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB); + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB); } /// \brief Adds the necessary PHI nodes for the versioned loops based on the /// loop-defined values used outside of the loop. void addPHINodes(const SmallVectorImpl &DefsUsedOutside) { - BasicBlock *PHIBlock = OrigLoop->getExitBlock(); + BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); assert(PHIBlock && "No single successor to loop exit block"); for (auto *Inst : DefsUsedOutside) { - auto *NonDistInst = cast(VMap[Inst]); + auto *NonVersionedLoopInst = cast(VMap[Inst]); PHINode *PN; // First see if we have a single-operand PHI with the value defined by the @@ -704,24 +707,25 @@ public: } // If not create it. if (!PN) { - PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist", + PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", PHIBlock->begin()); for (auto *User : Inst->users()) - if (!OrigLoop->contains(cast(User)->getParent())) + if (!VersionedLoop->contains(cast(User)->getParent())) User->replaceUsesOfWith(Inst, PN); - PN->addIncoming(Inst, OrigLoop->getExitingBlock()); + PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); } - // Add the new incoming value from the non-distributed loop. - PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock()); + // Add the new incoming value from the non-versioned loop. + PN->addIncoming(NonVersionedLoopInst, + NonVersionedLoop->getExitingBlock()); } } private: /// \brief The original loop. This becomes the "versioned" one, i.e. control /// goes if the memchecks all pass. - Loop *OrigLoop; + Loop *VersionedLoop; /// \brief The fall-back loop, i.e. if any of the memchecks fail. - Loop *NonDistributedLoop; + Loop *NonVersionedLoop; /// \brief For each memory pointer it contains the partitionId it is used in. /// If nullptr, no partitioning is used. @@ -730,8 +734,8 @@ private: /// If the pointer is used in multiple partitions the entry is set to -1. const SmallVector *PtrToPartition; - /// \brief This maps the instructions from OrigLoop to their counterpart in - /// NonDistributedLoop. + /// \brief This maps the instructions from VersionedLoop to their counterpart + /// in NonVersionedLoop. ValueToValueMapTy VMap; /// \brief Analyses used. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3de1333a7c98..714ce914a8b3 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -209,7 +209,7 @@ namespace { bool runOnNoncountableLoop(); bool runOnCountableLoop(); }; -} // namespace +} char LoopIdiomRecognize::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 4c40f249ce1d..e12502654751 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -52,7 +52,7 @@ namespace { AU.addRequired(); } }; -} // namespace +} char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index f6db9b114e3f..ed103e6b8ed6 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -438,7 +438,7 @@ namespace { bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions); }; -} // namespace +} char LoopReroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 2ba70ad1f1a7..a675e1289baf 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -79,7 +79,7 @@ namespace { AssumptionCache *AC; DominatorTree *DT; }; -} // namespace +} char LoopRotate::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ee7248691992..4b59f3d2f6cc 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -116,7 +116,7 @@ public: void dump() const; }; -} // namespace +} void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; @@ -157,7 +157,7 @@ public: const_iterator end() const { return RegSequence.end(); } }; -} // namespace +} void RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { @@ -281,7 +281,7 @@ struct Formula { void dump() const; }; -} // namespace +} /// DoInitialMatch - Recursion helper for InitialMatch. static void DoInitialMatch(const SCEV *S, Loop *L, @@ -903,7 +903,7 @@ private: SmallPtrSetImpl *LoserRegs); }; -} // namespace +} /// RateRegister - Tally up interesting quantities from the given register. void Cost::RateRegister(const SCEV *Reg, @@ -1102,7 +1102,7 @@ struct LSRFixup { void dump() const; }; -} // namespace +} LSRFixup::LSRFixup() : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), @@ -1252,7 +1252,7 @@ public: void dump() const; }; -} // namespace +} /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. @@ -1791,7 +1791,7 @@ public: void dump() const; }; -} // namespace +} /// OptimizeShadowIV - If IV is used in a int-to-float cast /// inside the loop then try to eliminate the cast operation. @@ -3644,7 +3644,7 @@ struct WorkItem { void dump() const; }; -} // namespace +} void WorkItem::print(raw_ostream &OS) const { OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx @@ -4949,7 +4949,7 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} // namespace +} char LoopStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index d702dc0b4ee9..9e7558d9c45f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -229,7 +229,7 @@ namespace { unsigned DynamicCostSavingsDiscount, uint64_t UnrolledCost, uint64_t RolledDynamicCost); }; -} // namespace +} char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 5bdc2ec88d4a..cbc563bd8998 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -80,6 +81,7 @@ namespace { struct LoopProperties { unsigned CanBeUnswitchedCount; + unsigned WasUnswitchedCount; unsigned SizeEstimation; UnswitchedValsMap UnswitchedVals; }; @@ -93,37 +95,52 @@ namespace { UnswitchedValsMap *CurLoopInstructions; LoopProperties *CurrentLoopProperties; - // Max size of code we can produce on remained iterations. + // A loop unswitching with an estimated cost above this threshold + // is not performed. MaxSize is turned into unswitching quota for + // the current loop, and reduced correspondingly, though note that + // the quota is returned by releaseMemory() when the loop has been + // processed, so that MaxSize will return to its previous + // value. So in most cases MaxSize will equal the Threshold flag + // when a new loop is processed. An exception to that is that + // MaxSize will have a smaller value while processing nested loops + // that were introduced due to loop unswitching of an outer loop. + // + // FIXME: The way that MaxSize works is subtle and depends on the + // pass manager processing loops and calling releaseMemory() in a + // specific order. It would be good to find a more straightforward + // way of doing what MaxSize does. unsigned MaxSize; - public: - - LUAnalysisCache() : - CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), - MaxSize(Threshold) - {} - - // Analyze loop. Check its size, calculate is it possible to unswitch - // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop *L, const TargetTransformInfo &TTI, - AssumptionCache *AC); - - // Clean all data related to given loop. - void forgetLoop(const Loop *L); - - // Mark case value as unswitched. - // Since SI instruction can be partly unswitched, in order to avoid - // extra unswitching in cloned loops keep track all unswitched values. - void setUnswitched(const SwitchInst *SI, const Value *V); - - // Check was this case value unswitched before or not. - bool isUnswitched(const SwitchInst *SI, const Value *V); - - // Clone all loop-unswitch related loop properties. - // Redistribute unswitching quotas. - // Note, that new loop data is stored inside the VMap. - void cloneData(const Loop *NewLoop, const Loop *OldLoop, - const ValueToValueMapTy &VMap); + public: + LUAnalysisCache() + : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), + MaxSize(Threshold) {} + + // Analyze loop. Check its size, calculate is it possible to unswitch + // it. Returns true if we can unswitch this loop. + bool countLoop(const Loop *L, const TargetTransformInfo &TTI, + AssumptionCache *AC); + + // Clean all data related to given loop. + void forgetLoop(const Loop *L); + + // Mark case value as unswitched. + // Since SI instruction can be partly unswitched, in order to avoid + // extra unswitching in cloned loops keep track all unswitched values. + void setUnswitched(const SwitchInst *SI, const Value *V); + + // Check was this case value unswitched before or not. + bool isUnswitched(const SwitchInst *SI, const Value *V); + + // Returns true if another unswitching could be done within the cost + // threshold. + bool CostAllowsUnswitching(); + + // Clone all loop-unswitch related loop properties. + // Redistribute unswitching quotas. + // Note, that new loop data is stored inside the VMap. + void cloneData(const Loop *NewLoop, const Loop *OldLoop, + const ValueToValueMapTy &VMap); }; class LoopUnswitch : public LoopPass { @@ -195,10 +212,12 @@ namespace { /// Update the appropriate Phi nodes as we do so. void SplitExitEdges(Loop *L, const SmallVectorImpl &ExitBlocks); - bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); + bool UnswitchIfProfitable(Value *LoopCond, Constant *Val, + TerminatorInst *TI = nullptr); void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, - BasicBlock *ExitBlock); - void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); + BasicBlock *ExitBlock, TerminatorInst *TI); + void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, + TerminatorInst *TI); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool isEqual); @@ -206,14 +225,15 @@ namespace { void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - Instruction *InsertPt); + Instruction *InsertPt, + TerminatorInst *TI); void SimplifyCode(std::vector &Worklist, Loop *L); bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, BasicBlock **LoopExit = nullptr); }; -} // namespace +} // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. @@ -242,12 +262,13 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; + ++I) Metrics.analyzeBasicBlock(*I, TTI, EphValues); - Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); + Props.SizeEstimation = Metrics.NumInsts; Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); + Props.WasUnswitchedCount = 0; MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; if (Metrics.notDuplicatable) { @@ -258,13 +279,6 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, } } - if (!Props.CanBeUnswitchedCount) { - DEBUG(dbgs() << "NOT unswitching loop %" - << L->getHeader()->getName() << ", cost too high: " - << L->getBlocks().size() << "\n"); - return false; - } - // Be careful. This links are good only before new loop addition. CurrentLoopProperties = &Props; CurLoopInstructions = &Props.UnswitchedVals; @@ -279,7 +293,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) { if (LIt != LoopsProperties.end()) { LoopProperties &Props = LIt->second; - MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation; + MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * + Props.SizeEstimation; LoopsProperties.erase(LIt); } @@ -299,6 +314,10 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { return (*CurLoopInstructions)[SI].count(V); } +bool LUAnalysisCache::CostAllowsUnswitching() { + return CurrentLoopProperties->CanBeUnswitchedCount > 0; +} + // Clone all loop-unswitch related loop properties. // Redistribute unswitching quotas. // Note, that new loop data is stored inside the VMap. @@ -312,6 +331,8 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, // Reallocate "can-be-unswitched quota" --OldLoopProps.CanBeUnswitchedCount; + ++OldLoopProps.WasUnswitchedCount; + NewLoopProps.WasUnswitchedCount = 0; unsigned Quota = OldLoopProps.CanBeUnswitchedCount; NewLoopProps.CanBeUnswitchedCount = Quota / 2; OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; @@ -453,8 +474,8 @@ bool LoopUnswitch::processCurrentLoop() { // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); - if (LoopCond && UnswitchIfProfitable(LoopCond, - ConstantInt::getTrue(Context))) { + if (LoopCond && + UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; return true; } @@ -643,7 +664,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. -bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, + TerminatorInst *TI) { Function *F = loopHeader->getParent(); Constant *CondVal = nullptr; BasicBlock *ExitBlock = nullptr; @@ -651,17 +673,25 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth // for this case. - UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI); return true; } // Check to see if it would be profitable to unswitch current loop. + if (!BranchesInfo.CostAllowsUnswitching()) { + DEBUG(dbgs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() + << " at non-trivial condition '" << *Val + << "' == " << *LoopCond << "\n" + << ". Cost too high.\n"); + return false; + } // Do not do non-trivial unswitch while optimizing for size. if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize)) return false; - UnswitchNontrivialCondition(LoopCond, Val, currentLoop); + UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI); return true; } @@ -685,25 +715,65 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, return New; } +static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, + bool Swapped) { + if (!SrcInst || !SrcInst->hasMetadata()) + return; + + SmallVector, 4> MDs; + SrcInst->getAllMetadata(MDs); + for (auto &MD : MDs) { + switch (MD.first) { + default: + break; + case LLVMContext::MD_prof: + if (Swapped && MD.second->getNumOperands() == 3 && + isa(MD.second->getOperand(0))) { + MDString *MDName = cast(MD.second->getOperand(0)); + if (MDName->getString() == "branch_weights") { + auto *ValT = cast_or_null( + MD.second->getOperand(1))->getValue(); + auto *ValF = cast_or_null( + MD.second->getOperand(2))->getValue(); + assert(ValT && ValF && "Invalid Operands of branch_weights"); + auto NewMD = + MDBuilder(DstInst->getParent()->getContext()) + .createBranchWeights(cast(ValF)->getZExtValue(), + cast(ValT)->getZExtValue()); + MD.second = NewMD; + } + } + // fallthrough. + case LLVMContext::MD_dbg: + DstInst->setMetadata(MD.first, MD.second); + } + } +} + /// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values /// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the /// code immediately before InsertPt. void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - Instruction *InsertPt) { + Instruction *InsertPt, + TerminatorInst *TI) { // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; + bool Swapped = false; if (!isa(Val) || Val->getType() != Type::getInt1Ty(LIC->getContext())) BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) + else if (Val != ConstantInt::getTrue(Val->getContext())) { // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); + Swapped = true; + } // Insert the new branch. BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + copyMetadata(BI, TI, Swapped); // If either edge is critical, split it. This helps preserve LoopSimplify // form for enclosing loops. @@ -717,13 +787,14 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, /// where the path through the loop that doesn't execute its body has no /// side-effects), unswitch it. This doesn't involve any code duplication, just /// moving the conditional branch outside of the loop and updating loop info. -void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, - Constant *Val, - BasicBlock *ExitBlock) { +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, + BasicBlock *ExitBlock, + TerminatorInst *TI) { DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << L->getHeader()->getParent()->getName() - << " on cond: " << *Val << " == " << *Cond << "\n"); + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " + << L->getHeader()->getParent()->getName() << " on cond: " << *Val + << " == " << *Cond << "\n"); // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change loopPreheader to have a @@ -744,7 +815,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, - loopPreheader->getTerminator()); + loopPreheader->getTerminator(), TI); LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); loopPreheader->getTerminator()->eraseFromParent(); @@ -780,7 +851,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L) { + Loop *L, TerminatorInst *TI) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() @@ -897,7 +968,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. - EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); + EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, + TI); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index b8b35d4249f0..3314e1ed41ab 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -138,7 +138,7 @@ namespace { return Changed; } }; -} // namespace +} char LowerAtomic::ID = 0; INITIALIZE_PASS(LowerAtomic, "loweratomic", diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index b845c038e67e..0c47cbd5bfda 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -181,7 +181,7 @@ public: bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); } }; -} // namespace +} char LowerExpectIntrinsic::ID = 0; INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2c9f93513ae2..85012afc80ac 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -153,7 +153,7 @@ struct MemsetRange { bool isProfitableToUseMemset(const DataLayout &DL) const; }; -} // namespace +} // end anon namespace bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { // If we found more than 4 stores to merge or 16 bytes, use memset. @@ -237,7 +237,7 @@ public: }; -} // namespace +} // end anon namespace /// addRange - Add a new store to the MemsetRanges data structure. This adds a @@ -355,7 +355,7 @@ namespace { }; char MemCpyOpt::ID = 0; -} // namespace +} // createMemCpyOptPass - The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 886b6f5b0a2c..243db8d70ca2 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -156,7 +156,7 @@ private: }; char MergedLoadStoreMotion::ID = 0; -} // namespace +} /// /// \brief createMergedLoadStoreMotionPass - The public interface to this file. diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 4cf68b00da0a..f42f8306fccc 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -74,21 +74,18 @@ // 1) We only considers n-ary adds for now. This should be extended and // generalized. // -// 2) Besides arithmetic operations, similar reassociation can be applied to -// GEPs. For example, if -// X = &arr[a] -// dominates -// Y = &arr[a + b] -// we may rewrite Y into X + b. -// //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -115,6 +112,7 @@ public: AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -163,12 +161,18 @@ private: // GEP's pointer size, i.e., whether Index needs to be sign-extended in order // to be an index of GEP. bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); + // Returns whether V is known to be non-negative at context \c Ctxt. + bool isKnownNonNegative(Value *V, Instruction *Ctxt); + // Returns whether AO may sign overflow at context \c Ctxt. It computes a + // conservative result -- it answers true when not sure. + bool maySignOverflow(AddOperator *AO, Instruction *Ctxt); + AssumptionCache *AC; + const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; TargetTransformInfo *TTI; - const DataLayout *DL; // A lookup table quickly telling which instructions compute the given SCEV. // Note that there can be multiple instructions at different locations // computing to the same SCEV, so we map a SCEV to an instruction list. For @@ -185,6 +189,7 @@ private: char NaryReassociate::ID = 0; INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -200,6 +205,7 @@ bool NaryReassociate::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); SE = &getAnalysis(); TLI = &getAnalysis().getTLI(); @@ -346,18 +352,44 @@ bool NaryReassociate::requiresSignExtension(Value *Index, return cast(Index->getType())->getBitWidth() < PointerSizeInBits; } +bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) { + bool NonNegative, Negative; + // TODO: ComputeSignBits is expensive. Consider caching the results. + ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT); + return NonNegative; +} + +bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) { + if (AO->hasNoSignedWrap()) + return false; + + Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1); + // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow. + // TODO: handle the negative case as well. + if (isKnownNonNegative(AO, Ctxt) && + (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt))) + return false; + + return true; +} + GetElementPtrInst * NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, Type *IndexedType) { Value *IndexToSplit = GEP->getOperand(I + 1); - if (SExtInst *SExt = dyn_cast(IndexToSplit)) + if (SExtInst *SExt = dyn_cast(IndexToSplit)) { IndexToSplit = SExt->getOperand(0); + } else if (ZExtInst *ZExt = dyn_cast(IndexToSplit)) { + // zext can be treated as sext if the source is non-negative. + if (isKnownNonNegative(ZExt->getOperand(0), GEP)) + IndexToSplit = ZExt->getOperand(0); + } if (AddOperator *AO = dyn_cast(IndexToSplit)) { // If the I-th index needs sext and the underlying add is not equipped with // nsw, we cannot split the add because // sext(LHS + RHS) != sext(LHS) + sext(RHS). - if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap()) + if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP)) return nullptr; Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1); // IndexToSplit = LHS + RHS. @@ -373,10 +405,9 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, return nullptr; } -GetElementPtrInst * -NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, - Value *LHS, Value *RHS, - Type *IndexedType) { +GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( + GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS, + Type *IndexedType) { // Look for GEP's closest dominator that has the same SCEV as GEP except that // the I-th index is replaced with LHS. SmallVector IndexExprs; @@ -384,6 +415,16 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, IndexExprs.push_back(SE->getSCEV(*Index)); // Replace the I-th index with LHS. IndexExprs[I] = SE->getSCEV(LHS); + if (isKnownNonNegative(LHS, GEP) && + DL->getTypeSizeInBits(LHS->getType()) < + DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) { + // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to + // zext if the source operand is proved non-negative. We should do that + // consistently so that CandidateExpr more likely appears before. See + // @reassociate_gep_assume for an example of this canonicalization. + IndexExprs[I] = + SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType()); + } const SCEV *CandidateExpr = SE->getGEPExpr( GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()), IndexExprs, GEP->isInBounds()); diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 5423499723f7..31d7df39c781 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -46,7 +46,7 @@ namespace { }; char PartiallyInlineLibCalls::ID = 0; -} // namespace +} INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls", "Partially inline calls to library functions", false, false) diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 670dcd24f75c..9ecaf102574a 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -160,7 +160,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { AU.setPreservesAll(); } }; -} // namespace +} static cl::opt NoEntry("spp-no-entry", cl::Hidden, cl::init(false)); static cl::opt NoCall("spp-no-call", cl::Hidden, cl::init(false)); @@ -181,7 +181,7 @@ struct PlaceSafepoints : public FunctionPass { // if that was worth doing } }; -} // namespace +} // Insert a safepoint poll immediately before the given instruction. Does // not handle the parsability of state at the runtime call, that's the diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 9842fd7bb6c7..d1acf785d07e 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -154,7 +154,7 @@ namespace { unsigned SymbolicRank; bool isOr; }; -} // namespace +} namespace { class Reassociate : public FunctionPass { @@ -197,7 +197,7 @@ namespace { void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); }; -} // namespace +} XorOpnd::XorOpnd(Value *V) { assert(!isa(V) && "No ConstantInt"); @@ -936,6 +936,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Push the negates through the add. I->setOperand(0, NegateValue(I->getOperand(0), BI)); I->setOperand(1, NegateValue(I->getOperand(1), BI)); + if (I->getOpcode() == Instruction::Add) { + I->setHasNoUnsignedWrap(false); + I->setHasNoSignedWrap(false); + } // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are @@ -976,6 +980,12 @@ static Value *NegateValue(Value *V, Instruction *BI) { InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); } TheNeg->moveBefore(InsertPt); + if (TheNeg->getOpcode() == Instruction::Sub) { + TheNeg->setHasNoUnsignedWrap(false); + TheNeg->setHasNoSignedWrap(false); + } else { + TheNeg->andIRFlags(BI); + } return TheNeg; } diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 2ff56e67c9c6..1b46727c17bb 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -58,7 +58,7 @@ namespace { bool runOnFunction(Function &F) override; }; -} // namespace +} char RegToMem::ID = 0; INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots", diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index c15bc1bd7eca..ae2ae3af0c7a 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -183,7 +183,7 @@ struct PartiallyConstructedSafepointRecord { /// Maps rematerialized copy to it's original value. RematerializedValueMapTy RematerializedValues; }; -} // namespace +} /// Compute the live-in set for every basic block in the function static void computeLiveInValues(DominatorTree &DT, Function &F, @@ -294,12 +294,17 @@ static void analyzeParsePointLiveness( static Value *findBaseDefiningValue(Value *I); -/// If we can trivially determine that the index specified in the given vector -/// is a base pointer, return it. In cases where the entire vector is known to -/// consist of base pointers, the entire vector will be returned. This -/// indicates that the relevant extractelement is a valid base pointer and -/// should be used directly. -static Value *findBaseOfVector(Value *I, Value *Index) { +/// Return a base defining value for the 'Index' element of the given vector +/// instruction 'I'. If Index is null, returns a BDV for the entire vector +/// 'I'. As an optimization, this method will try to determine when the +/// element is known to already be a base pointer. If this can be established, +/// the second value in the returned pair will be true. Note that either a +/// vector or a pointer typed value can be returned. For the former, the +/// vector returned is a BDV (and possibly a base) of the entire vector 'I'. +/// If the later, the return pointer is a BDV (or possibly a base) for the +/// particular element in 'I'. +static std::pair +findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { assert(I->getType()->isVectorTy() && cast(I->getType())->getElementType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); @@ -309,7 +314,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) { if (isa(I)) // An incoming argument to the function is a base pointer - return I; + return std::make_pair(I, true); // We shouldn't see the address of a global as a vector value? assert(!isa(I) && @@ -320,7 +325,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) { if (isa(I)) // utterly meaningless, but useful for dealing with partially optimized // code. - return I; + return std::make_pair(I, true); // Due to inheritance, this must be _after_ the global variable and undef // checks @@ -328,38 +333,56 @@ static Value *findBaseOfVector(Value *I, Value *Index) { assert(!isa(I) && !isa(I) && "order of checks wrong!"); assert(Con->isNullValue() && "null is the only case which makes sense"); - return Con; + return std::make_pair(Con, true); } - + if (isa(I)) - return I; - + return std::make_pair(I, true); + // For an insert element, we might be able to look through it if we know - // something about the indexes, but if the indices are arbitrary values, we - // can't without much more extensive scalarization. + // something about the indexes. if (InsertElementInst *IEI = dyn_cast(I)) { - Value *InsertIndex = IEI->getOperand(2); - // This index is inserting the value, look for it's base - if (InsertIndex == Index) - return findBaseDefiningValue(IEI->getOperand(1)); - // Both constant, and can't be equal per above. This insert is definitely - // not relevant, look back at the rest of the vector and keep trying. - if (isa(Index) && isa(InsertIndex)) - return findBaseOfVector(IEI->getOperand(0), Index); - } - - // Note: This code is currently rather incomplete. We are essentially only - // handling cases where the vector element is trivially a base pointer. We - // need to update the entire base pointer construction algorithm to know how - // to track vector elements and potentially scalarize, but the case which - // would motivate the work hasn't shown up in real workloads yet. - llvm_unreachable("no base found for vector element"); + if (Index) { + Value *InsertIndex = IEI->getOperand(2); + // This index is inserting the value, look for its BDV + if (InsertIndex == Index) + return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false); + // Both constant, and can't be equal per above. This insert is definitely + // not relevant, look back at the rest of the vector and keep trying. + if (isa(Index) && isa(InsertIndex)) + return findBaseDefiningValueOfVector(IEI->getOperand(0), Index); + } + + // We don't know whether this vector contains entirely base pointers or + // not. To be conservatively correct, we treat it as a BDV and will + // duplicate code as needed to construct a parallel vector of bases. + return std::make_pair(IEI, false); + } + + if (isa(I)) + // We don't know whether this vector contains entirely base pointers or + // not. To be conservatively correct, we treat it as a BDV and will + // duplicate code as needed to construct a parallel vector of bases. + // TODO: There a number of local optimizations which could be applied here + // for particular sufflevector patterns. + return std::make_pair(I, false); + + // A PHI or Select is a base defining value. The outer findBasePointer + // algorithm is responsible for constructing a base value for this BDV. + assert((isa(I) || isa(I)) && + "unknown vector instruction - no base found for vector element"); + return std::make_pair(I, false); } +static bool isKnownBaseResult(Value *V); + /// Helper function for findBasePointer - Will return a value which either a) /// defines the base pointer for the input or b) blocks the simple search /// (i.e. a PHI or Select of two derived pointers) static Value *findBaseDefiningValue(Value *I) { + if (I->getType()->isVectorTy()) + return findBaseDefiningValueOfVector(I).first; + assert(I->getType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); @@ -370,16 +393,39 @@ static Value *findBaseDefiningValue(Value *I) { if (auto *EEI = dyn_cast(I)) { Value *VectorOperand = EEI->getVectorOperand(); Value *Index = EEI->getIndexOperand(); - Value *VectorBase = findBaseOfVector(VectorOperand, Index); - // If the result returned is a vector, we know the entire vector must - // contain base pointers. In that case, the extractelement is a valid base - // for this value. - if (VectorBase->getType()->isVectorTy()) - return EEI; - // Otherwise, we needed to look through the vector to find the base for - // this particular element. - assert(VectorBase->getType()->isPointerTy()); - return VectorBase; + std::pair pair = + findBaseDefiningValueOfVector(VectorOperand, Index); + Value *VectorBase = pair.first; + if (VectorBase->getType()->isPointerTy()) + // We found a BDV for this specific element with the vector. This is an + // optimization, but in practice it covers most of the useful cases + // created via scalarization. + return VectorBase; + else { + assert(VectorBase->getType()->isVectorTy()); + if (pair.second) + // If the entire vector returned is known to be entirely base pointers, + // then the extractelement is valid base for this value. + return EEI; + else { + // Otherwise, we have an instruction which potentially produces a + // derived pointer and we need findBasePointers to clone code for us + // such that we can create an instruction which produces the + // accompanying base pointer. + // Note: This code is currently rather incomplete. We don't currently + // support the general form of shufflevector of insertelement. + // Conceptually, these are just 'base defining values' of the same + // variety as phi or select instructions. We need to update the + // findBasePointers algorithm to insert new 'base-only' versions of the + // original instructions. This is relative straight forward to do, but + // the case which would motivate the work hasn't shown up in real + // workloads yet. + assert((isa(VectorBase) || isa(VectorBase)) && + "need to extend findBasePointers for generic vector" + "instruction cases"); + return VectorBase; + } + } } if (isa(I)) @@ -646,7 +692,7 @@ private: llvm_unreachable("only three states!"); } }; -} // namespace +} /// For a given value or instruction, figure out what base ptr it's derived /// from. For gc objects, this is simply itself. On success, returns a value /// which is the base pointer. (This is reliable and can be used for @@ -1712,7 +1758,9 @@ static void findLiveReferences( /// slightly non-trivial since it requires a format change. Given how rare /// such cases are (for the moment?) scalarizing is an acceptable comprimise. static void splitVectorValues(Instruction *StatepointInst, - StatepointLiveSetTy &LiveSet, DominatorTree &DT) { + StatepointLiveSetTy &LiveSet, + DenseMap& PointerToBase, + DominatorTree &DT) { SmallVector ToSplit; for (Value *V : LiveSet) if (isa(V->getType())) @@ -1721,14 +1769,14 @@ static void splitVectorValues(Instruction *StatepointInst, if (ToSplit.empty()) return; + DenseMap> ElementMapping; + Function &F = *(StatepointInst->getParent()->getParent()); DenseMap AllocaMap; // First is normal return, second is exceptional return (invoke only) DenseMap> Replacements; for (Value *V : ToSplit) { - LiveSet.erase(V); - AllocaInst *Alloca = new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); AllocaMap[V] = Alloca; @@ -1738,7 +1786,7 @@ static void splitVectorValues(Instruction *StatepointInst, SmallVector Elements; for (unsigned i = 0; i < VT->getNumElements(); i++) Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); - LiveSet.insert(Elements.begin(), Elements.end()); + ElementMapping[V] = Elements; auto InsertVectorReform = [&](Instruction *IP) { Builder.SetInsertPoint(IP); @@ -1771,6 +1819,7 @@ static void splitVectorValues(Instruction *StatepointInst, Replacements[V].second = InsertVectorReform(IP); } } + for (Value *V : ToSplit) { AllocaInst *Alloca = AllocaMap[V]; @@ -1814,6 +1863,25 @@ static void splitVectorValues(Instruction *StatepointInst, for (Value *V : ToSplit) Allocas.push_back(AllocaMap[V]); PromoteMemToReg(Allocas, DT); + + // Update our tracking of live pointers and base mappings to account for the + // changes we just made. + for (Value *V : ToSplit) { + auto &Elements = ElementMapping[V]; + + LiveSet.erase(V); + LiveSet.insert(Elements.begin(), Elements.end()); + // We need to update the base mapping as well. + assert(PointerToBase.count(V)); + Value *OldBase = PointerToBase[V]; + auto &BaseElements = ElementMapping[OldBase]; + PointerToBase.erase(V); + assert(Elements.size() == BaseElements.size()); + for (unsigned i = 0; i < Elements.size(); i++) { + Value *Elem = Elements[i]; + PointerToBase[Elem] = BaseElements[i]; + } + } } // Helper function for the "rematerializeLiveValues". It walks use chain @@ -2075,17 +2143,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // site. findLiveReferences(F, DT, P, toUpdate, records); - // Do a limited scalarization of any live at safepoint vector values which - // contain pointers. This enables this pass to run after vectorization at - // the cost of some possible performance loss. TODO: it would be nice to - // natively support vectors all the way through the backend so we don't need - // to scalarize here. - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - Instruction *statepoint = toUpdate[i].getInstruction(); - splitVectorValues(cast(statepoint), info.liveset, DT); - } - // B) Find the base pointers for each live pointer /* scope for caching */ { // Cache the 'defining value' relation used in the computation and @@ -2146,6 +2203,18 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, } holders.clear(); + // Do a limited scalarization of any live at safepoint vector values which + // contain pointers. This enables this pass to run after vectorization at + // the cost of some possible performance loss. TODO: it would be nice to + // natively support vectors all the way through the backend so we don't need + // to scalarize here. + for (size_t i = 0; i < records.size(); i++) { + struct PartiallyConstructedSafepointRecord &info = records[i]; + Instruction *statepoint = toUpdate[i].getInstruction(); + splitVectorValues(cast(statepoint), info.liveset, + info.PointerToBase, DT); + } + // In order to reduce live set of statepoint we might choose to rematerialize // some values instead of relocating them. This is purelly an optimization and // does not influence correctness. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index bc068f78c576..305175ff8f73 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1055,7 +1055,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { // load null -> null if (isa(Ptr) && I.getPointerAddressSpace() == 0) - return markConstant(IV, &I, Constant::getNullValue(I.getType())); + return markConstant(IV, &I, UndefValue::get(I.getType())); // Transform load (constant global) into the value loaded. if (GlobalVariable *GV = dyn_cast(Ptr)) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index f38b2b1dbf96..056dd11b5ab3 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -127,7 +127,7 @@ typedef llvm::IRBuilder> typedef llvm::IRBuilder> IRBuilderTy; #endif -} // namespace +} namespace { /// \brief A used slice of an alloca. @@ -595,7 +595,7 @@ private: /// the alloca. SmallVector DeadOperands; }; -} // namespace +} static Value *foldSelectInst(SelectInst &SI) { // If the condition being selected on is a constant or the same value is @@ -1173,7 +1173,7 @@ public: } } }; -} // namespace +} // end anon namespace namespace { /// \brief An optimization pass providing Scalar Replacement of Aggregates. @@ -1268,7 +1268,7 @@ private: void deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); bool promoteAllocas(Function &F); }; -} // namespace +} char SROA::ID = 0; @@ -3119,7 +3119,7 @@ private: return true; } }; -} // namespace +} namespace { /// \brief Visitor to rewrite aggregate loads and stores as scalar. @@ -3327,7 +3327,7 @@ private: return false; } }; -} // namespace +} /// \brief Strip aggregate type wrapping. /// diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 69e3a67aa8c1..c8dfa54a4aa0 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -174,7 +174,7 @@ protected: /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; }; -} // namespace +} /// \brief Print the weight of edge \p E on stream \p OS. /// diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e42c3daab8d7..d955da7ce75d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -221,7 +221,7 @@ namespace { } }; -} // namespace +} char SROA_DT::ID = 0; char SROA_SSAUp::ID = 0; @@ -1123,7 +1123,7 @@ public: } } }; -} // namespace +} // end anon namespace /// isSafeSelectToSpeculate - Select instructions that use an alloca and are /// subsequently loaded can be rewritten to load both input pointers and then diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 0733daf40f39..231411a16c05 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -48,8 +48,8 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1), STATISTIC(NumSimpl, "Number of blocks simplified"); -/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi -/// node) return blocks, merge them together to promote recursive block merging. +/// If we have more than one empty (other than phi node) return blocks, +/// merge them together to promote recursive block merging. static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; @@ -124,7 +124,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { return Changed; } -/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, AssumptionCache *AC, @@ -134,8 +134,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks and remove them if they are unneeded... - // + // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { LocalChange = true; @@ -159,7 +158,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, // removeUnreachableBlocks is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning iterativelySimplifyCFG if the second pass of + // avoid rerunning iterativelySimplifyCFG if the second pass of // removeUnreachableBlocks doesn't do anything. if (!removeUnreachableBlocks(F)) return true; @@ -220,7 +219,7 @@ struct CFGSimplifyPass : public FunctionPass { AU.addRequired(); } }; -} // namespace +} char CFGSimplifyPass::ID = 0; INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index f32769c24110..6d9d417ef943 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -224,11 +224,13 @@ FunctionPass *llvm::createStraightLineStrengthReducePass() { bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, const Candidate &C) { return (Basis.Ins != C.Ins && // skip the same instruction + // They must have the same type too. Basis.Base == C.Base doesn't + // guarantee their types are the same (PR23975). + Basis.Ins->getType() == C.Ins->getType() && // Basis must dominate C in order to rewrite C with respect to Basis. DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && // They share the same base, stride, and candidate kind. - Basis.Base == C.Base && - Basis.Stride == C.Stride && + Basis.Base == C.Base && Basis.Stride == C.Stride && Basis.CandidateKind == C.CandidateKind); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index d23f5153c188..c7de2e2965c7 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -120,7 +120,7 @@ namespace { bool CanMoveAboveCall(Instruction *I, CallInst *CI); Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); }; -} // namespace +} char TailCallElim::ID = 0; INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", @@ -246,7 +246,7 @@ struct AllocaDerivedValueTracker { SmallPtrSet AllocaUsers; SmallPtrSet EscapePoints; }; -} // namespace +} bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { if (F.callsFunctionThatReturnsTwice()) -- cgit v1.2.3