From 1a82d4c088707c791c792f6822f611b47a12bdfe Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Sun, 5 Jul 2015 14:21:36 +0000
Subject: Vendor import of llvm trunk r241361:
 https://llvm.org/svn/llvm-project/llvm/trunk@241361

---
 lib/Transforms/Scalar/ADCE.cpp                     |   2 +-
 lib/Transforms/Scalar/AlignmentFromAssumptions.cpp |   2 +-
 lib/Transforms/Scalar/BDCE.cpp                     |   2 +-
 lib/Transforms/Scalar/ConstantHoisting.cpp         |   2 +-
 lib/Transforms/Scalar/ConstantProp.cpp             |   2 +-
 .../Scalar/CorrelatedValuePropagation.cpp          |   2 +-
 lib/Transforms/Scalar/DCE.cpp                      |   4 +-
 lib/Transforms/Scalar/DeadStoreElimination.cpp     |   2 +-
 lib/Transforms/Scalar/EarlyCSE.cpp                 |  12 +-
 lib/Transforms/Scalar/FlattenCFGPass.cpp           |   2 +-
 lib/Transforms/Scalar/Float2Int.cpp                |   2 +-
 lib/Transforms/Scalar/GVN.cpp                      |  20 +--
 lib/Transforms/Scalar/IndVarSimplify.cpp           |  15 +-
 .../Scalar/InductiveRangeCheckElimination.cpp      |   4 +-
 lib/Transforms/Scalar/JumpThreading.cpp            |   2 +-
 lib/Transforms/Scalar/LICM.cpp                     |   4 +-
 lib/Transforms/Scalar/LoadCombine.cpp              |   2 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |   2 +-
 lib/Transforms/Scalar/LoopDistribute.cpp           |  54 +++---
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp       |   2 +-
 lib/Transforms/Scalar/LoopInstSimplify.cpp         |   2 +-
 lib/Transforms/Scalar/LoopRerollPass.cpp           |   2 +-
 lib/Transforms/Scalar/LoopRotation.cpp             |   2 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp       |  18 +-
 lib/Transforms/Scalar/LoopUnrollPass.cpp           |   2 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp             | 194 ++++++++++++++-------
 lib/Transforms/Scalar/LowerAtomic.cpp              |   2 +-
 lib/Transforms/Scalar/LowerExpectIntrinsic.cpp     |   2 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp          |   6 +-
 lib/Transforms/Scalar/MergedLoadStoreMotion.cpp    |   2 +-
 lib/Transforms/Scalar/NaryReassociate.cpp          |  69 ++++++--
 lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp  |   2 +-
 lib/Transforms/Scalar/PlaceSafepoints.cpp          |   4 +-
 lib/Transforms/Scalar/Reassociate.cpp              |  14 +-
 lib/Transforms/Scalar/Reg2Mem.cpp                  |   2 +-
 lib/Transforms/Scalar/RewriteStatepointsForGC.cpp  | 183 +++++++++++++------
 lib/Transforms/Scalar/SCCP.cpp                     |   2 +-
 lib/Transforms/Scalar/SROA.cpp                     |  12 +-
 lib/Transforms/Scalar/SampleProfile.cpp            |   2 +-
 lib/Transforms/Scalar/ScalarReplAggregates.cpp     |   4 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp          |  13 +-
 .../Scalar/StraightLineStrengthReduce.cpp          |   6 +-
 lib/Transforms/Scalar/TailRecursionElimination.cpp |   4 +-
 43 files changed, 442 insertions(+), 246 deletions(-)

(limited to 'lib/Transforms/Scalar')

diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fe0224bb56c7..d6fc91641588 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -44,7 +44,7 @@ struct ADCE : public FunctionPass {
     AU.setPreservesCFG();
   }
 };
-} // namespace
+}
 
 char ADCE::ID = 0;
 INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index a4e5446a2b12..8918909f484a 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -76,7 +76,7 @@ struct AlignmentFromAssumptions : public FunctionPass {
                             const SCEV *&OffSCEV);
   bool processAssumption(CallInst *I);
 };
-} // namespace
+}
 
 char AlignmentFromAssumptions::ID = 0;
 static const char aip_name[] = "Alignment from assumptions";
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 8ffbacddda68..09c605e76737 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -66,7 +66,7 @@ struct BDCE : public FunctionPass {
   AssumptionCache *AC;
   DominatorTree *DT;
 };
-} // namespace
+}
 
 char BDCE::ID = 0;
 INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index cc1dc9435a05..4288742dd3eb 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -171,7 +171,7 @@ private:
   void deleteDeadCastInst() const;
   bool optimizeConstants(Function &Fn);
 };
-} // namespace
+}
 
 char ConstantHoisting::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index e3df86ecf169..c974ebb9456f 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -47,7 +47,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-} // namespace
+}
 
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b1809b7fae08..79624b2e4c47 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -56,7 +56,7 @@ namespace {
       AU.addRequired<LazyValueInfo>();
     }
   };
-} // namespace
+}
 
 char CorrelatedValuePropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index aa628e5aca81..3b262a23091f 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -60,7 +60,7 @@ namespace {
       AU.setPreservesCFG();
     }
   };
-} // namespace
+}
 
 char DeadInstElimination::ID = 0;
 INITIALIZE_PASS(DeadInstElimination, "die",
@@ -87,7 +87,7 @@ namespace {
       AU.setPreservesCFG();
     }
  };
-} // namespace
+}
 
 char DCE::ID = 0;
 INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c99dc5fc8445..c50558434da2 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -92,7 +92,7 @@ namespace {
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
   };
-} // namespace
+}
 
 char DSE::ID = 0;
 INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 8b629eaca9d4..d536a937dce1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,7 +72,7 @@ struct SimpleValue {
            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
   }
 };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<SimpleValue> {
@@ -85,7 +85,7 @@ template <> struct DenseMapInfo<SimpleValue> {
   static unsigned getHashValue(SimpleValue Val);
   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
 };
-} // namespace llvm
+}
 
 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
@@ -219,7 +219,7 @@ struct CallValue {
     return true;
   }
 };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<CallValue> {
@@ -232,7 +232,7 @@ template <> struct DenseMapInfo<CallValue> {
   static unsigned getHashValue(CallValue Val);
   static bool isEqual(CallValue LHS, CallValue RHS);
 };
-} // namespace llvm
+}
 
 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
   Instruction *Inst = Val.Inst;
@@ -447,7 +447,7 @@ private:
                                                  ExpectedType);
   }
 };
-} // namespace
+}
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
   BasicBlock *BB = Node->getBlock();
@@ -764,7 +764,7 @@ public:
     AU.setPreservesCFG();
   }
 };
-} // namespace
+}
 
 char EarlyCSELegacyPass::ID = 0;
 
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index dd6ea8d455c5..0430c1898c8d 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -36,7 +36,7 @@ public:
 private:
   AliasAnalysis *AA;
 };
-} // namespace
+}
 
 char FlattenCFGPass::ID = 0;
 INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index bb90c5f73239..c9314229c38b 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -79,7 +79,7 @@ namespace {
     MapVector<Instruction*, Value*> ConvertedInsts;
     LLVMContext *Ctx;
   };
-} // namespace
+}
 
 char Float2Int::ID = 0;
 INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index d9308c4e3710..60903c8b4aaf 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -138,7 +138,7 @@ namespace {
     uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
     void verifyRemoved(const Value *) const;
   };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<Expression> {
@@ -159,7 +159,7 @@ template <> struct DenseMapInfo<Expression> {
   }
 };
 
-} // namespace llvm
+}
 
 //===----------------------------------------------------------------------===//
 //                     ValueTable Internal Functions
@@ -723,7 +723,7 @@ namespace {
   };
 
   char GVN::ID = 0;
-} // namespace
+}
 
 // The public interface to this file...
 FunctionPass *llvm::createGVNPass(bool NoLoads) {
@@ -1783,13 +1783,9 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // being replaced.
   BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
   BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
-  if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
-      isa<OverflowingBinaryOperator>(ReplOp)) {
-    if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
-      ReplOp->setHasNoSignedWrap(false);
-    if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
-      ReplOp->setHasNoUnsignedWrap(false);
-  }
+  if (Op && ReplOp)
+    ReplOp->andIRFlags(Op);
+
   if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
     // FIXME: If both the original and replacement value are part of the
     // same control-flow region (meaning that the execution of one
@@ -2808,6 +2804,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
   if (!BI || BI->isUnconditional())
     return false;
 
+  // If a branch has two identical successors, we cannot declare either dead.
+  if (BI->getSuccessor(0) == BI->getSuccessor(1))
+    return false;
+
   ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
   if (!Cond)
     return false;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e931382ea98f..6f0375487af6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -136,7 +136,7 @@ namespace {
 
     void SinkUnusedInvariants(Loop *L);
   };
-} // namespace
+}
 
 char IndVarSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
@@ -494,7 +494,7 @@ struct RewritePhi {
   RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
       : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
 };
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 // RewriteLoopExitValues - Optimize IV users outside the loop.
@@ -758,7 +758,7 @@ namespace {
     WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
                    IsSigned(false) {}
   };
-} // namespace
+}
 
 /// visitCast - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
@@ -1321,7 +1321,7 @@ namespace {
     // Implement the interface used by simplifyUsersOfIV.
     void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   };
-} // namespace
+}
 
 /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
 /// users. Each successive simplification may push more users which may
@@ -2013,11 +2013,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Now that we're done iterating through lists, clean up any instructions
   // which are now dead.
-  while (!DeadInsts.empty()) {
-    Value *V = static_cast<Value *>(DeadInsts.pop_back_val());
-    if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+            dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
-  }
 
   // The Rewriter may not be used from this point on.
 
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index ce1a0ca8c7d9..cbdacad8f28b 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -222,7 +222,7 @@ public:
 };
 
 char InductiveRangeCheckElimination::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
                 "Inductive range check elimination", false, false)
@@ -618,7 +618,7 @@ public:
   bool run();
 };
 
-} // namespace
+}
 
 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
                                       BasicBlock *ReplaceBy) {
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 7316db6ca02c..1130d228acb8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -138,7 +138,7 @@ namespace {
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
     bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
   };
-} // namespace
+}
 
 char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index e5019463bb5f..f0e6d641b180 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -156,7 +156,7 @@ namespace {
     /// Simple Analysis hook. Delete loop L from alias set map.
     void deleteAnalysisLoop(Loop *L) override;
   };
-} // namespace
+}
 
 char LICM::ID = 0;
 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -777,7 +777,7 @@ namespace {
       AST.deleteValue(I);
     }
   };
-} // namespace
+} // end anon namespace
 
 /// Try to promote memory values to scalars by sinking stores out of the
 /// loop and moving loads to before the loop.  We do this by looping over
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 3dbf6ac6ed08..c19cd19059b2 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -77,7 +77,7 @@ private:
   bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
   bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
 };
-} // namespace
+}
 
 bool LoadCombine::doInitialization(Function &F) {
   DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 02760ffe2c68..98b068edf582 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -57,7 +57,7 @@ namespace {
                     bool &Changed, BasicBlock *Preheader);
 
   };
-} // namespace
+}
 
 char LoopDeletion::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d21a7db48c51..0325d268c325 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -635,10 +635,11 @@ public:
   LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
                  DominatorTree *DT,
                  const SmallVector<int, 8> *PtrToPartition = nullptr)
-      : OrigLoop(L), NonDistributedLoop(nullptr),
+      : VersionedLoop(L), NonVersionedLoop(nullptr),
         PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
 
-  /// \brief Returns true if we need memchecks to distribute the loop.
+  /// \brief Returns true if we need memchecks to disambiguate may-aliasing
+  /// accesses.
   bool needsRuntimeChecks() const {
     return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
   }
@@ -649,49 +650,51 @@ public:
     Instruction *FirstCheckInst;
     Instruction *MemRuntimeCheck;
     // Add the memcheck in the original preheader (this is empty initially).
-    BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
+    BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
     std::tie(FirstCheckInst, MemRuntimeCheck) =
         LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
     assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
 
     // Rename the block to make the IR more readable.
-    MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
+    MemCheckBB->setName(VersionedLoop->getHeader()->getName() +
+                        ".lver.memcheck");
 
     // Create empty preheader for the loop (and after cloning for the
-    // original/nondist loop).
+    // non-versioned loop).
     BasicBlock *PH =
         SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
-    PH->setName(OrigLoop->getHeader()->getName() + ".ph");
+    PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
 
     // Clone the loop including the preheader.
     //
     // FIXME: This does not currently preserve SimplifyLoop because the exit
     // block is a join between the two loops.
-    SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
-    NonDistributedLoop =
-        cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
-                               LI, DT, NonDistributedLoopBlocks);
-    remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
+    SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+    NonVersionedLoop =
+        cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap,
+                               ".lver.orig", LI, DT, NonVersionedLoopBlocks);
+    remapInstructionsInLoop(NonVersionedLoopBlocks, VMap);
 
     // Insert the conditional branch based on the result of the memchecks.
     Instruction *OrigTerm = MemCheckBB->getTerminator();
-    BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
-                       OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
+    BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+                       VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
+                       OrigTerm);
     OrigTerm->eraseFromParent();
 
     // The loops merge in the original exit block.  This is now dominated by the
     // memchecking block.
-    DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
+    DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
   }
 
   /// \brief Adds the necessary PHI nodes for the versioned loops based on the
   /// loop-defined values used outside of the loop.
   void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
-    BasicBlock *PHIBlock = OrigLoop->getExitBlock();
+    BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
     assert(PHIBlock && "No single successor to loop exit block");
 
     for (auto *Inst : DefsUsedOutside) {
-      auto *NonDistInst = cast<Instruction>(VMap[Inst]);
+      auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
       PHINode *PN;
 
       // First see if we have a single-operand PHI with the value defined by the
@@ -704,24 +707,25 @@ public:
       }
       // If not create it.
       if (!PN) {
-        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
+        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
                              PHIBlock->begin());
         for (auto *User : Inst->users())
-          if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
+          if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
             User->replaceUsesOfWith(Inst, PN);
-        PN->addIncoming(Inst, OrigLoop->getExitingBlock());
+        PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
       }
-      // Add the new incoming value from the non-distributed loop.
-      PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
+      // Add the new incoming value from the non-versioned loop.
+      PN->addIncoming(NonVersionedLoopInst,
+                      NonVersionedLoop->getExitingBlock());
     }
   }
 
 private:
   /// \brief The original loop.  This becomes the "versioned" one, i.e. control
   /// goes if the memchecks all pass.
-  Loop *OrigLoop;
+  Loop *VersionedLoop;
   /// \brief The fall-back loop, i.e. if any of the memchecks fail.
-  Loop *NonDistributedLoop;
+  Loop *NonVersionedLoop;
 
   /// \brief For each memory pointer it contains the partitionId it is used in.
   /// If nullptr, no partitioning is used.
@@ -730,8 +734,8 @@ private:
   /// If the pointer is used in multiple partitions the entry is set to -1.
   const SmallVector<int, 8> *PtrToPartition;
 
-  /// \brief This maps the instructions from OrigLoop to their counterpart in
-  /// NonDistributedLoop.
+  /// \brief This maps the instructions from VersionedLoop to their counterpart
+  /// in NonVersionedLoop.
   ValueToValueMapTy VMap;
 
   /// \brief Analyses used.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3de1333a7c98..714ce914a8b3 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -209,7 +209,7 @@ namespace {
     bool runOnNoncountableLoop();
     bool runOnCountableLoop();
   };
-} // namespace
+}
 
 char LoopIdiomRecognize::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 4c40f249ce1d..e12502654751 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -52,7 +52,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-} // namespace
+}
 
 char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index f6db9b114e3f..ed103e6b8ed6 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -438,7 +438,7 @@ namespace {
     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
                 ReductionTracker &Reductions);
   };
-} // namespace
+}
 
 char LoopReroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ba70ad1f1a7..a675e1289baf 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,7 @@ namespace {
     AssumptionCache *AC;
     DominatorTree *DT;
   };
-} // namespace
+}
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ee7248691992..4b59f3d2f6cc 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -116,7 +116,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
@@ -157,7 +157,7 @@ public:
   const_iterator end() const   { return RegSequence.end(); }
 };
 
-} // namespace
+}
 
 void
 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
@@ -281,7 +281,7 @@ struct Formula {
   void dump() const;
 };
 
-} // namespace
+}
 
 /// DoInitialMatch - Recursion helper for InitialMatch.
 static void DoInitialMatch(const SCEV *S, Loop *L,
@@ -903,7 +903,7 @@ private:
                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
 };
 
-} // namespace
+}
 
 /// RateRegister - Tally up interesting quantities from the given register.
 void Cost::RateRegister(const SCEV *Reg,
@@ -1102,7 +1102,7 @@ struct LSRFixup {
   void dump() const;
 };
 
-} // namespace
+}
 
 LSRFixup::LSRFixup()
   : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
@@ -1252,7 +1252,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
@@ -1791,7 +1791,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 /// OptimizeShadowIV - If IV is used in a int-to-float cast
 /// inside the loop then try to eliminate the cast operation.
@@ -3644,7 +3644,7 @@ struct WorkItem {
   void dump() const;
 };
 
-} // namespace
+}
 
 void WorkItem::print(raw_ostream &OS) const {
   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
@@ -4949,7 +4949,7 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // namespace
+}
 
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d702dc0b4ee9..9e7558d9c45f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -229,7 +229,7 @@ namespace {
                              unsigned DynamicCostSavingsDiscount,
                              uint64_t UnrolledCost, uint64_t RolledDynamicCost);
   };
-} // namespace
+}
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5bdc2ec88d4a..cbc563bd8998 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -43,6 +43,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -80,6 +81,7 @@ namespace {
 
     struct LoopProperties {
       unsigned CanBeUnswitchedCount;
+      unsigned WasUnswitchedCount;
       unsigned SizeEstimation;
       UnswitchedValsMap UnswitchedVals;
     };
@@ -93,37 +95,52 @@ namespace {
     UnswitchedValsMap *CurLoopInstructions;
     LoopProperties *CurrentLoopProperties;
 
-    // Max size of code we can produce on remained iterations.
+    // A loop unswitching with an estimated cost above this threshold
+    // is not performed. MaxSize is turned into unswitching quota for
+    // the current loop, and reduced correspondingly, though note that
+    // the quota is returned by releaseMemory() when the loop has been
+    // processed, so that MaxSize will return to its previous
+    // value. So in most cases MaxSize will equal the Threshold flag
+    // when a new loop is processed. An exception to that is that
+    // MaxSize will have a smaller value while processing nested loops
+    // that were introduced due to loop unswitching of an outer loop.
+    //
+    // FIXME: The way that MaxSize works is subtle and depends on the
+    // pass manager processing loops and calling releaseMemory() in a
+    // specific order. It would be good to find a more straightforward
+    // way of doing what MaxSize does.
     unsigned MaxSize;
 
-    public:
-
-      LUAnalysisCache() :
-        CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
-        MaxSize(Threshold)
-      {}
-
-      // Analyze loop. Check its size, calculate is it possible to unswitch
-      // it. Returns true if we can unswitch this loop.
-      bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
-                     AssumptionCache *AC);
-
-      // Clean all data related to given loop.
-      void forgetLoop(const Loop *L);
-
-      // Mark case value as unswitched.
-      // Since SI instruction can be partly unswitched, in order to avoid
-      // extra unswitching in cloned loops keep track all unswitched values.
-      void setUnswitched(const SwitchInst *SI, const Value *V);
-
-      // Check was this case value unswitched before or not.
-      bool isUnswitched(const SwitchInst *SI, const Value *V);
-
-      // Clone all loop-unswitch related loop properties.
-      // Redistribute unswitching quotas.
-      // Note, that new loop data is stored inside the VMap.
-      void cloneData(const Loop *NewLoop, const Loop *OldLoop,
-                     const ValueToValueMapTy &VMap);
+  public:
+    LUAnalysisCache()
+        : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
+          MaxSize(Threshold) {}
+
+    // Analyze loop. Check its size, calculate is it possible to unswitch
+    // it. Returns true if we can unswitch this loop.
+    bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+                   AssumptionCache *AC);
+
+    // Clean all data related to given loop.
+    void forgetLoop(const Loop *L);
+
+    // Mark case value as unswitched.
+    // Since SI instruction can be partly unswitched, in order to avoid
+    // extra unswitching in cloned loops keep track all unswitched values.
+    void setUnswitched(const SwitchInst *SI, const Value *V);
+
+    // Check was this case value unswitched before or not.
+    bool isUnswitched(const SwitchInst *SI, const Value *V);
+
+    // Returns true if another unswitching could be done within the cost
+    // threshold.
+    bool CostAllowsUnswitching();
+
+    // Clone all loop-unswitch related loop properties.
+    // Redistribute unswitching quotas.
+    // Note, that new loop data is stored inside the VMap.
+    void cloneData(const Loop *NewLoop, const Loop *OldLoop,
+                   const ValueToValueMapTy &VMap);
   };
 
   class LoopUnswitch : public LoopPass {
@@ -195,10 +212,12 @@ namespace {
     /// Update the appropriate Phi nodes as we do so.
     void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
 
-    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+                              TerminatorInst *TI = nullptr);
     void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
-                                  BasicBlock *ExitBlock);
-    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+                                  BasicBlock *ExitBlock, TerminatorInst *TI);
+    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
+                                     TerminatorInst *TI);
 
     void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                               Constant *Val, bool isEqual);
@@ -206,14 +225,15 @@ namespace {
     void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                         BasicBlock *TrueDest,
                                         BasicBlock *FalseDest,
-                                        Instruction *InsertPt);
+                                        Instruction *InsertPt,
+                                        TerminatorInst *TI);
 
     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
     bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
                                     BasicBlock **LoopExit = nullptr);
 
   };
-} // namespace
+}
 
 // Analyze loop. Check its size, calculate is it possible to unswitch
 // it. Returns true if we can unswitch this loop.
@@ -242,12 +262,13 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
     // consideration code simplification opportunities and code that can
     // be shared by the resultant unswitched loops.
     CodeMetrics Metrics;
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-         I != E; ++I)
+    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
+         ++I)
       Metrics.analyzeBasicBlock(*I, TTI, EphValues);
 
-    Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+    Props.SizeEstimation = Metrics.NumInsts;
     Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+    Props.WasUnswitchedCount = 0;
     MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
 
     if (Metrics.notDuplicatable) {
@@ -258,13 +279,6 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
     }
   }
 
-  if (!Props.CanBeUnswitchedCount) {
-    DEBUG(dbgs() << "NOT unswitching loop %"
-                 << L->getHeader()->getName() << ", cost too high: "
-                 << L->getBlocks().size() << "\n");
-    return false;
-  }
-
   // Be careful. This links are good only before new loop addition.
   CurrentLoopProperties = &Props;
   CurLoopInstructions = &Props.UnswitchedVals;
@@ -279,7 +293,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
 
   if (LIt != LoopsProperties.end()) {
     LoopProperties &Props = LIt->second;
-    MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+    MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
+               Props.SizeEstimation;
     LoopsProperties.erase(LIt);
   }
 
@@ -299,6 +314,10 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
   return (*CurLoopInstructions)[SI].count(V);
 }
 
+bool LUAnalysisCache::CostAllowsUnswitching() {
+  return CurrentLoopProperties->CanBeUnswitchedCount > 0;
+}
+
 // Clone all loop-unswitch related loop properties.
 // Redistribute unswitching quotas.
 // Note, that new loop data is stored inside the VMap.
@@ -312,6 +331,8 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
   // Reallocate "can-be-unswitched quota"
 
   --OldLoopProps.CanBeUnswitchedCount;
+  ++OldLoopProps.WasUnswitchedCount;
+  NewLoopProps.WasUnswitchedCount = 0;
   unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
   NewLoopProps.CanBeUnswitchedCount = Quota / 2;
   OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
@@ -453,8 +474,8 @@ bool LoopUnswitch::processCurrentLoop() {
         // unswitch on it if we desire.
         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
                                                currentLoop, Changed);
-        if (LoopCond && UnswitchIfProfitable(LoopCond,
-                                             ConstantInt::getTrue(Context))) {
+        if (LoopCond &&
+            UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
           ++NumBranches;
           return true;
         }
@@ -643,7 +664,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
 /// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+                                        TerminatorInst *TI) {
   Function *F = loopHeader->getParent();
   Constant *CondVal = nullptr;
   BasicBlock *ExitBlock = nullptr;
@@ -651,17 +673,25 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
     // If the condition is trivial, always unswitch. There is no code growth
     // for this case.
-    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
     return true;
   }
 
   // Check to see if it would be profitable to unswitch current loop.
+  if (!BranchesInfo.CostAllowsUnswitching()) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+                 << currentLoop->getHeader()->getName()
+                 << " at non-trivial condition '" << *Val
+                 << "' == " << *LoopCond << "\n"
+                 << ". Cost too high.\n");
+    return false;
+  }
 
   // Do not do non-trivial unswitch while optimizing for size.
   if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
     return false;
 
-  UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+  UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
   return true;
 }
 
@@ -685,25 +715,65 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
   return New;
 }
 
+static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
+                         bool Swapped) {
+  if (!SrcInst || !SrcInst->hasMetadata())
+    return;
+
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  SrcInst->getAllMetadata(MDs);
+  for (auto &MD : MDs) {
+    switch (MD.first) {
+    default:
+      break;
+    case LLVMContext::MD_prof:
+      if (Swapped && MD.second->getNumOperands() == 3 &&
+          isa<MDString>(MD.second->getOperand(0))) {
+        MDString *MDName = cast<MDString>(MD.second->getOperand(0));
+        if (MDName->getString() == "branch_weights") {
+          auto *ValT = cast_or_null<ConstantAsMetadata>(
+                           MD.second->getOperand(1))->getValue();
+          auto *ValF = cast_or_null<ConstantAsMetadata>(
+                           MD.second->getOperand(2))->getValue();
+          assert(ValT && ValF && "Invalid Operands of branch_weights");
+          auto NewMD =
+              MDBuilder(DstInst->getParent()->getContext())
+                  .createBranchWeights(cast<ConstantInt>(ValF)->getZExtValue(),
+                                       cast<ConstantInt>(ValT)->getZExtValue());
+          MD.second = NewMD;
+        }
+      }
+      // fallthrough.
+    case LLVMContext::MD_dbg:
+      DstInst->setMetadata(MD.first, MD.second);
+    }
+  }
+}
+
 /// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
 /// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
 /// code immediately before InsertPt.
 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                                   BasicBlock *TrueDest,
                                                   BasicBlock *FalseDest,
-                                                  Instruction *InsertPt) {
+                                                  Instruction *InsertPt,
+                                                  TerminatorInst *TI) {
   // Insert a conditional branch on LIC to the two preheaders.  The original
   // code is the true version and the new code is the false version.
   Value *BranchVal = LIC;
+  bool Swapped = false;
   if (!isa<ConstantInt>(Val) ||
       Val->getType() != Type::getInt1Ty(LIC->getContext()))
     BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
-  else if (Val != ConstantInt::getTrue(Val->getContext()))
+  else if (Val != ConstantInt::getTrue(Val->getContext())) {
     // We want to enter the new loop when the condition is true.
     std::swap(TrueDest, FalseDest);
+    Swapped = true;
+  }
 
   // Insert the new branch.
   BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+  copyMetadata(BI, TI, Swapped);
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
@@ -717,13 +787,14 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 /// where the path through the loop that doesn't execute its body has no
 /// side-effects), unswitch it.  This doesn't involve any code duplication, just
 /// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
-                                            Constant *Val,
-                                            BasicBlock *ExitBlock) {
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+                                            BasicBlock *ExitBlock,
+                                            TerminatorInst *TI) {
   DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
-        << loopHeader->getName() << " [" << L->getBlocks().size()
-        << " blocks] in Function " << L->getHeader()->getParent()->getName()
-        << " on cond: " << *Val << " == " << *Cond << "\n");
+               << loopHeader->getName() << " [" << L->getBlocks().size()
+               << " blocks] in Function "
+               << L->getHeader()->getParent()->getName() << " on cond: " << *Val
+               << " == " << *Cond << "\n");
 
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
@@ -744,7 +815,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
   // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
   EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
-                                 loopPreheader->getTerminator());
+                                 loopPreheader->getTerminator(), TI);
   LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
   loopPreheader->getTerminator()->eraseFromParent();
 
@@ -780,7 +851,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 /// to unswitch when LIC equal Val.  Split it into loop versions and test the
 /// condition outside of either loop.  Return the loops created as Out1/Out2.
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
-                                               Loop *L) {
+                                               Loop *L, TerminatorInst *TI) {
   Function *F = loopHeader->getParent();
   DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
         << loopHeader->getName() << " [" << L->getBlocks().size()
@@ -897,7 +968,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
          "Preheader splitting did not work correctly!");
 
   // Emit the new branch that selects between the two versions of this loop.
-  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
+                                 TI);
   LPM->deleteSimpleAnalysisValue(OldBR, L);
   OldBR->eraseFromParent();
 
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index b8b35d4249f0..3314e1ed41ab 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -138,7 +138,7 @@ namespace {
       return Changed;
     }
   };
-} // namespace
+}
 
 char LowerAtomic::ID = 0;
 INITIALIZE_PASS(LowerAtomic, "loweratomic",
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index b845c038e67e..0c47cbd5bfda 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -181,7 +181,7 @@ public:
 
   bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
 };
-} // namespace
+}
 
 char LowerExpectIntrinsic::ID = 0;
 INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2c9f93513ae2..85012afc80ac 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -153,7 +153,7 @@ struct MemsetRange {
 
   bool isProfitableToUseMemset(const DataLayout &DL) const;
 };
-} // namespace
+} // end anon namespace
 
 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
   // If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -237,7 +237,7 @@ public:
 
 };
 
-} // namespace
+} // end anon namespace
 
 
 /// addRange - Add a new store to the MemsetRanges data structure.  This adds a
@@ -355,7 +355,7 @@ namespace {
   };
 
   char MemCpyOpt::ID = 0;
-} // namespace
+}
 
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 886b6f5b0a2c..243db8d70ca2 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -156,7 +156,7 @@ private:
 };
 
 char MergedLoadStoreMotion::ID = 0;
-} // namespace
+}
 
 ///
 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 4cf68b00da0a..f42f8306fccc 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -74,21 +74,18 @@
 // 1) We only considers n-ary adds for now. This should be extended and
 // generalized.
 //
-// 2) Besides arithmetic operations, similar reassociation can be applied to
-// GEPs. For example, if
-//   X = &arr[a]
-// dominates
-//   Y = &arr[a + b]
-// we may rewrite Y into X + b.
-//
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -115,6 +112,7 @@ public:
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<ScalarEvolution>();
     AU.addPreserved<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -163,12 +161,18 @@ private:
   // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
   // to be an index of GEP.
   bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+  // Returns whether V is known to be non-negative at context \c Ctxt.
+  bool isKnownNonNegative(Value *V, Instruction *Ctxt);
+  // Returns whether AO may sign overflow at context \c Ctxt. It computes a
+  // conservative result -- it answers true when not sure.
+  bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
 
+  AssumptionCache *AC;
+  const DataLayout *DL;
   DominatorTree *DT;
   ScalarEvolution *SE;
   TargetLibraryInfo *TLI;
   TargetTransformInfo *TTI;
-  const DataLayout *DL;
   // A lookup table quickly telling which instructions compute the given SCEV.
   // Note that there can be multiple instructions at different locations
   // computing to the same SCEV, so we map a SCEV to an instruction list.  For
@@ -185,6 +189,7 @@ private:
 char NaryReassociate::ID = 0;
 INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
                       false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -200,6 +205,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -346,18 +352,44 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
   return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
 }
 
+bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
+  bool NonNegative, Negative;
+  // TODO: ComputeSignBits is expensive. Consider caching the results.
+  ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
+  return NonNegative;
+}
+
+bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
+  if (AO->hasNoSignedWrap())
+    return false;
+
+  Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
+  // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
+  // TODO: handle the negative case as well.
+  if (isKnownNonNegative(AO, Ctxt) &&
+      (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
+    return false;
+
+  return true;
+}
+
 GetElementPtrInst *
 NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
                                           Type *IndexedType) {
   Value *IndexToSplit = GEP->getOperand(I + 1);
-  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit))
+  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
     IndexToSplit = SExt->getOperand(0);
+  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
+    // zext can be treated as sext if the source is non-negative.
+    if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+      IndexToSplit = ZExt->getOperand(0);
+  }
 
   if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
     // If the I-th index needs sext and the underlying add is not equipped with
     // nsw, we cannot split the add because
     //   sext(LHS + RHS) != sext(LHS) + sext(RHS).
-    if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap())
+    if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
       return nullptr;
     Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
     // IndexToSplit = LHS + RHS.
@@ -373,10 +405,9 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
   return nullptr;
 }
 
-GetElementPtrInst *
-NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
-                                          Value *LHS, Value *RHS,
-                                          Type *IndexedType) {
+GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
+    GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS,
+    Type *IndexedType) {
   // Look for GEP's closest dominator that has the same SCEV as GEP except that
   // the I-th index is replaced with LHS.
   SmallVector<const SCEV *, 4> IndexExprs;
@@ -384,6 +415,16 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
     IndexExprs.push_back(SE->getSCEV(*Index));
   // Replace the I-th index with LHS.
   IndexExprs[I] = SE->getSCEV(LHS);
+  if (isKnownNonNegative(LHS, GEP) &&
+      DL->getTypeSizeInBits(LHS->getType()) <
+          DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
+    // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
+    // zext if the source operand is proved non-negative. We should do that
+    // consistently so that CandidateExpr more likely appears before. See
+    // @reassociate_gep_assume for an example of this canonicalization.
+    IndexExprs[I] =
+        SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
+  }
   const SCEV *CandidateExpr = SE->getGEPExpr(
       GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
       IndexExprs, GEP->isInBounds());
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 5423499723f7..31d7df39c781 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -46,7 +46,7 @@ namespace {
   };
 
   char PartiallyInlineLibCalls::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
                 "Partially inline calls to library functions", false, false)
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 670dcd24f75c..9ecaf102574a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -160,7 +160,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-} // namespace
+}
 
 static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
 static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
@@ -181,7 +181,7 @@ struct PlaceSafepoints : public FunctionPass {
     // if that was worth doing
   }
 };
-} // namespace
+}
 
 // Insert a safepoint poll immediately before the given instruction.  Does
 // not handle the parsability of state at the runtime call, that's the
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 9842fd7bb6c7..d1acf785d07e 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -154,7 +154,7 @@ namespace {
     unsigned SymbolicRank;
     bool isOr;
   };
-} // namespace
+}
 
 namespace {
   class Reassociate : public FunctionPass {
@@ -197,7 +197,7 @@ namespace {
     void OptimizeInst(Instruction *I);
     Instruction *canonicalizeNegConstExpr(Instruction *I);
   };
-} // namespace
+}
 
 XorOpnd::XorOpnd(Value *V) {
   assert(!isa<ConstantInt>(V) && "No ConstantInt");
@@ -936,6 +936,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
     // Push the negates through the add.
     I->setOperand(0, NegateValue(I->getOperand(0), BI));
     I->setOperand(1, NegateValue(I->getOperand(1), BI));
+    if (I->getOpcode() == Instruction::Add) {
+      I->setHasNoUnsignedWrap(false);
+      I->setHasNoSignedWrap(false);
+    }
 
     // We must move the add instruction here, because the neg instructions do
     // not dominate the old add instruction in general.  By moving it, we are
@@ -976,6 +980,12 @@ static Value *NegateValue(Value *V, Instruction *BI) {
       InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
     }
     TheNeg->moveBefore(InsertPt);
+    if (TheNeg->getOpcode() == Instruction::Sub) {
+      TheNeg->setHasNoUnsignedWrap(false);
+      TheNeg->setHasNoSignedWrap(false);
+    } else {
+      TheNeg->andIRFlags(BI);
+    }
     return TheNeg;
   }
 
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 2ff56e67c9c6..1b46727c17bb 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -58,7 +58,7 @@ namespace {
 
     bool runOnFunction(Function &F) override;
   };
-} // namespace
+}
 
 char RegToMem::ID = 0;
 INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index c15bc1bd7eca..ae2ae3af0c7a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -183,7 +183,7 @@ struct PartiallyConstructedSafepointRecord {
   /// Maps rematerialized copy to it's original value.
   RematerializedValueMapTy RematerializedValues;
 };
-} // namespace
+}
 
 /// Compute the live-in set for every basic block in the function
 static void computeLiveInValues(DominatorTree &DT, Function &F,
@@ -294,12 +294,17 @@ static void analyzeParsePointLiveness(
 
 static Value *findBaseDefiningValue(Value *I);
 
-/// If we can trivially determine that the index specified in the given vector
-/// is a base pointer, return it.  In cases where the entire vector is known to
-/// consist of base pointers, the entire vector will be returned.  This
-/// indicates that the relevant extractelement is a valid base pointer and
-/// should be used directly.
-static Value *findBaseOfVector(Value *I, Value *Index) {
+/// Return a base defining value for the 'Index' element of the given vector
+/// instruction 'I'.  If Index is null, returns a BDV for the entire vector
+/// 'I'.  As an optimization, this method will try to determine when the 
+/// element is known to already be a base pointer.  If this can be established,
+/// the second value in the returned pair will be true.  Note that either a
+/// vector or a pointer typed value can be returned.  For the former, the
+/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
+/// If the later, the return pointer is a BDV (or possibly a base) for the
+/// particular element in 'I'.  
+static std::pair<Value *, bool>
+findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
   assert(I->getType()->isVectorTy() &&
          cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
@@ -309,7 +314,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
 
   if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
-    return I;
+    return std::make_pair(I, true);
 
   // We shouldn't see the address of a global as a vector value?
   assert(!isa<GlobalVariable>(I) &&
@@ -320,7 +325,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
   if (isa<UndefValue>(I))
     // utterly meaningless, but useful for dealing with partially optimized
     // code.
-    return I;
+    return std::make_pair(I, true);
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
@@ -328,38 +333,56 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
     assert(Con->isNullValue() && "null is the only case which makes sense");
-    return Con;
+    return std::make_pair(Con, true);
   }
-
+  
   if (isa<LoadInst>(I))
-    return I;
-
+    return std::make_pair(I, true);
+  
   // For an insert element, we might be able to look through it if we know
-  // something about the indexes, but if the indices are arbitrary values, we
-  // can't without much more extensive scalarization.
+  // something about the indexes.
   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
-    Value *InsertIndex = IEI->getOperand(2);
-    // This index is inserting the value, look for it's base
-    if (InsertIndex == Index)
-      return findBaseDefiningValue(IEI->getOperand(1));
-    // Both constant, and can't be equal per above. This insert is definitely
-    // not relevant, look back at the rest of the vector and keep trying.
-    if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
-      return findBaseOfVector(IEI->getOperand(0), Index);
-  }
-
-  // Note: This code is currently rather incomplete.  We are essentially only
-  // handling cases where the vector element is trivially a base pointer.  We
-  // need to update the entire base pointer construction algorithm to know how
-  // to track vector elements and potentially scalarize, but the case which
-  // would motivate the work hasn't shown up in real workloads yet.
-  llvm_unreachable("no base found for vector element");
+    if (Index) {
+      Value *InsertIndex = IEI->getOperand(2);
+      // This index is inserting the value, look for its BDV
+      if (InsertIndex == Index)
+        return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
+      // Both constant, and can't be equal per above. This insert is definitely
+      // not relevant, look back at the rest of the vector and keep trying.
+      if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
+        return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
+    }
+    
+    // We don't know whether this vector contains entirely base pointers or
+    // not.  To be conservatively correct, we treat it as a BDV and will
+    // duplicate code as needed to construct a parallel vector of bases.
+    return std::make_pair(IEI, false);
+  }
+
+  if (isa<ShuffleVectorInst>(I))
+    // We don't know whether this vector contains entirely base pointers or
+    // not.  To be conservatively correct, we treat it as a BDV and will
+    // duplicate code as needed to construct a parallel vector of bases.
+    // TODO: There a number of local optimizations which could be applied here
+    // for particular sufflevector patterns.
+    return std::make_pair(I, false);
+
+  // A PHI or Select is a base defining value.  The outer findBasePointer
+  // algorithm is responsible for constructing a base value for this BDV.
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "unknown vector instruction - no base found for vector element");
+  return std::make_pair(I, false);
 }
 
+static bool isKnownBaseResult(Value *V);
+
 /// Helper function for findBasePointer - Will return a value which either a)
 /// defines the base pointer for the input or b) blocks the simple search
 /// (i.e. a PHI or Select of two derived pointers)
 static Value *findBaseDefiningValue(Value *I) {
+  if (I->getType()->isVectorTy())
+    return findBaseDefiningValueOfVector(I).first;
+  
   assert(I->getType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
 
@@ -370,16 +393,39 @@ static Value *findBaseDefiningValue(Value *I) {
   if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
     Value *VectorOperand = EEI->getVectorOperand();
     Value *Index = EEI->getIndexOperand();
-    Value *VectorBase = findBaseOfVector(VectorOperand, Index);
-    // If the result returned is a vector, we know the entire vector must
-    // contain base pointers.  In that case, the extractelement is a valid base
-    // for this value.
-    if (VectorBase->getType()->isVectorTy())
-      return EEI;
-    // Otherwise, we needed to look through the vector to find the base for
-    // this particular element.
-    assert(VectorBase->getType()->isPointerTy());
-    return VectorBase;
+    std::pair<Value *, bool> pair =
+      findBaseDefiningValueOfVector(VectorOperand, Index);
+    Value *VectorBase = pair.first;
+    if (VectorBase->getType()->isPointerTy())
+      // We found a BDV for this specific element with the vector.  This is an
+      // optimization, but in practice it covers most of the useful cases
+      // created via scalarization.
+      return VectorBase;
+    else {
+      assert(VectorBase->getType()->isVectorTy());
+      if (pair.second)
+        // If the entire vector returned is known to be entirely base pointers,
+        // then the extractelement is valid base for this value.
+        return EEI;
+      else {
+        // Otherwise, we have an instruction which potentially produces a
+        // derived pointer and we need findBasePointers to clone code for us
+        // such that we can create an instruction which produces the
+        // accompanying base pointer.
+        // Note: This code is currently rather incomplete.  We don't currently
+        // support the general form of shufflevector of insertelement.
+        // Conceptually, these are just 'base defining values' of the same
+        // variety as phi or select instructions.  We need to update the
+        // findBasePointers algorithm to insert new 'base-only' versions of the
+        // original instructions. This is relative straight forward to do, but
+        // the case which would motivate the work hasn't shown up in real
+        // workloads yet.  
+        assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
+               "need to extend findBasePointers for generic vector"
+               "instruction cases");
+        return VectorBase;
+      }
+    }
   }
 
   if (isa<Argument>(I))
@@ -646,7 +692,7 @@ private:
     llvm_unreachable("only three states!");
   }
 };
-} // namespace
+}
 /// For a given value or instruction, figure out what base ptr it's derived
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
@@ -1712,7 +1758,9 @@ static void findLiveReferences(
 /// slightly non-trivial since it requires a format change.  Given how rare
 /// such cases are (for the moment?) scalarizing is an acceptable comprimise.
 static void splitVectorValues(Instruction *StatepointInst,
-                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+                              StatepointLiveSetTy &LiveSet,
+                              DenseMap<Value *, Value *>& PointerToBase,
+                              DominatorTree &DT) {
   SmallVector<Value *, 16> ToSplit;
   for (Value *V : LiveSet)
     if (isa<VectorType>(V->getType()))
@@ -1721,14 +1769,14 @@ static void splitVectorValues(Instruction *StatepointInst,
   if (ToSplit.empty())
     return;
 
+  DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
+
   Function &F = *(StatepointInst->getParent()->getParent());
 
   DenseMap<Value *, AllocaInst *> AllocaMap;
   // First is normal return, second is exceptional return (invoke only)
   DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
   for (Value *V : ToSplit) {
-    LiveSet.erase(V);
-
     AllocaInst *Alloca =
         new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
     AllocaMap[V] = Alloca;
@@ -1738,7 +1786,7 @@ static void splitVectorValues(Instruction *StatepointInst,
     SmallVector<Value *, 16> Elements;
     for (unsigned i = 0; i < VT->getNumElements(); i++)
       Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
-    LiveSet.insert(Elements.begin(), Elements.end());
+    ElementMapping[V] = Elements;
 
     auto InsertVectorReform = [&](Instruction *IP) {
       Builder.SetInsertPoint(IP);
@@ -1771,6 +1819,7 @@ static void splitVectorValues(Instruction *StatepointInst,
       Replacements[V].second = InsertVectorReform(IP);
     }
   }
+
   for (Value *V : ToSplit) {
     AllocaInst *Alloca = AllocaMap[V];
 
@@ -1814,6 +1863,25 @@ static void splitVectorValues(Instruction *StatepointInst,
   for (Value *V : ToSplit)
     Allocas.push_back(AllocaMap[V]);
   PromoteMemToReg(Allocas, DT);
+
+  // Update our tracking of live pointers and base mappings to account for the
+  // changes we just made.
+  for (Value *V : ToSplit) {
+    auto &Elements = ElementMapping[V];
+
+    LiveSet.erase(V);
+    LiveSet.insert(Elements.begin(), Elements.end());
+    // We need to update the base mapping as well.
+    assert(PointerToBase.count(V));
+    Value *OldBase = PointerToBase[V];
+    auto &BaseElements = ElementMapping[OldBase];
+    PointerToBase.erase(V);
+    assert(Elements.size() == BaseElements.size());
+    for (unsigned i = 0; i < Elements.size(); i++) {
+      Value *Elem = Elements[i];
+      PointerToBase[Elem] = BaseElements[i];
+    }
+  }
 }
 
 // Helper function for the "rematerializeLiveValues". It walks use chain
@@ -2075,17 +2143,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // site.
   findLiveReferences(F, DT, P, toUpdate, records);
 
-  // Do a limited scalarization of any live at safepoint vector values which
-  // contain pointers.  This enables this pass to run after vectorization at
-  // the cost of some possible performance loss.  TODO: it would be nice to
-  // natively support vectors all the way through the backend so we don't need
-  // to scalarize here.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    Instruction *statepoint = toUpdate[i].getInstruction();
-    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
-  }
-
   // B) Find the base pointers for each live pointer
   /* scope for caching */ {
     // Cache the 'defining value' relation used in the computation and
@@ -2146,6 +2203,18 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
   holders.clear();
 
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset,
+                      info.PointerToBase, DT);
+  }
+
   // In order to reduce live set of statepoint we might choose to rematerialize
   // some values instead of relocating them. This is purelly an optimization and
   // does not influence correctness.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index bc068f78c576..305175ff8f73 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1055,7 +1055,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
 
   // load null -> null
   if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
-    return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+    return markConstant(IV, &I, UndefValue::get(I.getType()));
 
   // Transform load (constant global) into the value loaded.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f38b2b1dbf96..056dd11b5ab3 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -127,7 +127,7 @@ typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
 typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
     IRBuilderTy;
 #endif
-} // namespace
+}
 
 namespace {
 /// \brief A used slice of an alloca.
@@ -595,7 +595,7 @@ private:
   /// the alloca.
   SmallVector<Use *, 8> DeadOperands;
 };
-} // namespace
+}
 
 static Value *foldSelectInst(SelectInst &SI) {
   // If the condition being selected on is a constant or the same value is
@@ -1173,7 +1173,7 @@ public:
     }
   }
 };
-} // namespace
+} // end anon namespace
 
 namespace {
 /// \brief An optimization pass providing Scalar Replacement of Aggregates.
@@ -1268,7 +1268,7 @@ private:
   void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
   bool promoteAllocas(Function &F);
 };
-} // namespace
+}
 
 char SROA::ID = 0;
 
@@ -3119,7 +3119,7 @@ private:
     return true;
   }
 };
-} // namespace
+}
 
 namespace {
 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3327,7 +3327,7 @@ private:
     return false;
   }
 };
-} // namespace
+}
 
 /// \brief Strip aggregate type wrapping.
 ///
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 69e3a67aa8c1..c8dfa54a4aa0 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -174,7 +174,7 @@ protected:
   /// \brief Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid;
 };
-} // namespace
+}
 
 /// \brief Print the weight of edge \p E on stream \p OS.
 ///
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e42c3daab8d7..d955da7ce75d 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -221,7 +221,7 @@ namespace {
     }
   };
 
-} // namespace
+}
 
 char SROA_DT::ID = 0;
 char SROA_SSAUp::ID = 0;
@@ -1123,7 +1123,7 @@ public:
     }
   }
 };
-} // namespace
+} // end anon namespace
 
 /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
 /// subsequently loaded can be rewritten to load both input pointers and then
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 0733daf40f39..231411a16c05 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -48,8 +48,8 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
-/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
-/// node) return blocks, merge them together to promote recursive block merging.
+/// If we have more than one empty (other than phi node) return blocks,
+/// merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
   bool Changed = false;
 
@@ -124,7 +124,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
   return Changed;
 }
 
-/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
 static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
                                    AssumptionCache *AC,
@@ -134,8 +134,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
   while (LocalChange) {
     LocalChange = false;
 
-    // Loop over all of the basic blocks and remove them if they are unneeded...
-    //
+    // Loop over all of the basic blocks and remove them if they are unneeded.
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
       if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
         LocalChange = true;
@@ -159,7 +158,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
   // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
   // removeUnreachableBlocks is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
-  // avoid reruning iterativelySimplifyCFG if the second pass of
+  // avoid rerunning iterativelySimplifyCFG if the second pass of
   // removeUnreachableBlocks doesn't do anything.
   if (!removeUnreachableBlocks(F))
     return true;
@@ -220,7 +219,7 @@ struct CFGSimplifyPass : public FunctionPass {
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 };
-} // namespace
+}
 
 char CFGSimplifyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index f32769c24110..6d9d417ef943 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -224,11 +224,13 @@ FunctionPass *llvm::createStraightLineStrengthReducePass() {
 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
                                             const Candidate &C) {
   return (Basis.Ins != C.Ins && // skip the same instruction
+          // They must have the same type too. Basis.Base == C.Base doesn't
+          // guarantee their types are the same (PR23975).
+          Basis.Ins->getType() == C.Ins->getType() &&
           // Basis must dominate C in order to rewrite C with respect to Basis.
           DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
           // They share the same base, stride, and candidate kind.
-          Basis.Base == C.Base &&
-          Basis.Stride == C.Stride &&
+          Basis.Base == C.Base && Basis.Stride == C.Stride &&
           Basis.CandidateKind == C.CandidateKind);
 }
 
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index d23f5153c188..c7de2e2965c7 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -120,7 +120,7 @@ namespace {
     bool CanMoveAboveCall(Instruction *I, CallInst *CI);
     Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
   };
-} // namespace
+}
 
 char TailCallElim::ID = 0;
 INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
@@ -246,7 +246,7 @@ struct AllocaDerivedValueTracker {
   SmallPtrSet<Instruction *, 32> AllocaUsers;
   SmallPtrSet<Instruction *, 32> EscapePoints;
 };
-} // namespace
+}
 
 bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
   if (F.callsFunctionThatReturnsTwice())
-- 
cgit v1.2.3