38 files changed, 418 insertions, 458 deletions
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 7276f2524fed..de7d21f9f133 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
 void DemandedBits::print(raw_ostream &OS) {
   performAnalysis();
   for (auto &KV : AliveBits) {
-    OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
-       << *KV.first << "\n";
+    OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue())
+       << " for " << *KV.first << '\n';
   }
 }
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 3ce1281743c3..93fb1143e505 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
   if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
-    if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
-      return Elt;
+    if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
+      if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
+        return Elt;
 
   // An undef extract index can be arbitrarily chosen to be an out-of-range
   // index value, which would result in the instruction being undef.
@@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
         return *ArgBegin;
       return nullptr;
     }
+    case Intrinsic::bswap: {
+      Value *IIOperand = *ArgBegin;
+      Value *X = nullptr;
+      // bswap(bswap(x)) -> x
+      if (match(IIOperand, m_BSwap(m_Value(X))))
+        return X;
+      return nullptr;
+    }
+    case Intrinsic::bitreverse: {
+      Value *IIOperand = *ArgBegin;
+      Value *X = nullptr;
+      // bitreverse(bitreverse(x)) -> x
+      if (match(IIOperand, m_BitReverse(m_Value(X))))
+        return X;
+      return nullptr;
+    }
     default:
       return nullptr;
     }
@@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
         return SimplifyRelativeLoad(C0, C1, Q.DL);
       return nullptr;
     }
+    case Intrinsic::powi:
+      if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
+        // powi(x, 0) -> 1.0
+        if (Power->isZero())
+          return ConstantFP::get(LHS->getType(), 1.0);
+        // powi(x, 1) -> x
+        if (Power->isOne())
+          return LHS;
+      }
+      return nullptr;
     default:
       return nullptr;
     }
@@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
   return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
 }
 
+Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
+  CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
+  return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+                        Q, RecursionLimit);
+}
+
 /// See if we can compute a simplified version of this instruction.
 /// If not, this returns null.
 
@@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
-    Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
-                          Q);
+    Result = SimplifyCall(CS, Q);
     break;
   }
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 7b792ed0a2e2..0e3f498cb14c 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) {
         // Check that noalias arguments don't alias other arguments. This is
         // not fully precise because we don't know the sizes of the dereferenced
         // memory regions.
-        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
-          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
+          AttributeList PAL = CS.getAttributes();
+          unsigned ArgNo = 0;
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+            // Skip ByVal arguments since they will be memcpy'd to the callee's
+            // stack so we're not really passing the pointer anyway.
+            if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+              continue;
             if (AI != BI && (*BI)->getType()->isPointerTy()) {
               AliasResult Result = AA->alias(*AI, *BI);
               Assert(Result != MustAlias && Result != PartialAlias,
                      "Unusual: noalias argument aliases another argument", &I);
             }
+          }
+        }
 
         // Check that an sret argument points to valid memory.
         if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index bb7bf967994c..bf83f52ccf2e 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
     Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
   const MemoryLocation Loc = MemoryLocation::get(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
-  return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result);
-}
-
-void MemoryDependenceResults::getNonLocalPointerDependencyFrom(
-    Instruction *QueryInst,
-    const MemoryLocation &Loc,
-    bool isLoad,
-    SmallVectorImpl<NonLocalDepResult> &Result) {
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
 
@@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   // If we already have a cache entry for this CacheKey, we may need to do some
   // work to reconcile the cache entry and the current query.
   if (!Pair.second) {
-    if (CacheInfo->Size != Loc.Size) {
-      // The query's Size differs from the cached one. Throw out the
-      // cached data and proceed with the query at the new size.
+    if (CacheInfo->Size < Loc.Size) {
+      // The query's Size is greater than the cached one. Throw out the
+      // cached data and proceed with the query at the greater size.
       CacheInfo->Pair = BBSkipFirstBlockPair();
       CacheInfo->Size = Loc.Size;
       for (auto &Entry : CacheInfo->NonLocalDeps)
         if (Instruction *Inst = Entry.getResult().getInst())
           RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
       CacheInfo->NonLocalDeps.clear();
+    } else if (CacheInfo->Size > Loc.Size) {
+      // This query's Size is less than the cached one. Conservatively restart
+      // the query using the greater size.
+      return getNonLocalPointerDepFromBB(
+          QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
+          StartBB, Result, Visited, SkipFirstBlock);
     }
 
     // If the query's AATags are inconsistent with the cached one,
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 10badd89a4a8..efa5bd564ad0 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
       // Inliner doesn't handle variadic functions.
       // FIXME: refactor this to use the same code that inliner is using.
-      F.isVarArg();
+      F.isVarArg() ||
+      // Don't try to import functions with noinline attribute.
+      F.getAttributes().hasFnAttribute(Attribute::NoInline);
   GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
                                     /* Live = */ false, F.isDSOLocal());
   FunctionSummary::FFlags FunFlags{
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 2a8088dc4452..f34549ae52b4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
     }
     if (!hasTrunc)
       return getAddExpr(Operands);
-    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+    // In spite we checked in the beginning that ID is not in the cache,
+    // it is possible that during recursion and different modification
+    // ID came to cache, so if we found it, just return it.
+    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+      return S;
   }
 
   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
@@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
     }
     if (!hasTrunc)
       return getMulExpr(Operands);
-    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+    // In spite we checked in the beginning that ID is not in the cache,
+    // it is possible that during recursion and different modification
+    // ID came to cache, so if we found it, just return it.
+    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+      return S;
   }
 
   // If the input value is a chrec scev, truncate the chrec's operands.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 86f714b930d0..3ceda677ba61 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
       // generated code.
       if (isa<DbgInfoIntrinsic>(IP))
         ScanLimit++;
+
+      // Conservatively, do not use any instruction which has any of wrap/exact
+      // flags installed.
+      // TODO: Instead of simply disable poison instructions we can be clever
+      //       here and match SCEV to this instruction.
+      auto canGeneratePoison = [](Instruction *I) {
+        if (isa<OverflowingBinaryOperator>(I) &&
+            (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
+          return true;
+        if (isa<PossiblyExactOperator>(I) && I->isExact())
+          return true;
+        return false;
+      };
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
-          IP->getOperand(1) == RHS)
+          IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
         return &*IP;
       if (IP == BlockBegin) break;
     }
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index c9e9c6d1a419..b744cae51ed7 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   return Cost;
 }
 
-bool TargetTransformInfo::isOutOfOrder() const {
-  return TTIImpl->isOutOfOrder();
-}
-
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
   return TTIImpl->getNumberOfRegisters(Vector);
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 2730daefa625..cd4cee631568 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
   LHS = CmpLHS;
   RHS = CmpRHS;
 
-  // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
-  // return inconsistent results between implementations.
-  //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
-  //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
-  // Therefore we behave conservatively and only proceed if at least one of the
-  // operands is known to not be zero, or if we don't care about signed zeroes.
+  // Signed zero may return inconsistent results between implementations.
+  //  (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+  //  minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+  // Therefore, we behave conservatively and only proceed if at least one of the
+  // operands is known to not be zero or if we don't care about signed zero.
   switch (Pred) {
   default: break;
+  // FIXME: Include OGT/OLT/UGT/ULT.
   case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
   case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
     if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
@@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
 
   // Deal with type mismatches.
   if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
-    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
+      // If this is a potential fmin/fmax with a cast to integer, then ignore
+      // -0.0 because there is no corresponding integer value.
+      if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
+        FMF.setNoSignedZeros();
       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                   cast<CastInst>(TrueVal)->getOperand(0), C,
                                   LHS, RHS);
-    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+    }
+    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
+      // If this is a potential fmin/fmax with a cast to integer, then ignore
+      // -0.0 because there is no corresponding integer value.
+      if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
+        FMF.setNoSignedZeros();
       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                   C, cast<CastInst>(FalseVal)->getOperand(0),
                                   LHS, RHS);
+    }
   }
   return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
                               LHS, RHS);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 388663eb1db7..3218dce8f575 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
   EVT VT = Sel.getValueType();
   SDLoc DL(Sel);
   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
-  assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
-          isConstantFPBuildVectorOrConstantFP(NewCT)) &&
-         "Failed to constant fold a binop with constant operands");
+  if (!NewCT.isUndef() &&
+      !isConstantOrConstantVector(NewCT, true) &&
+      !isConstantFPBuildVectorOrConstantFP(NewCT))
+    return SDValue();
 
   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
-  assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
-          isConstantFPBuildVectorOrConstantFP(NewCF)) &&
-         "Failed to constant fold a binop with constant operands");
+  if (!NewCF.isUndef() &&
+      !isConstantOrConstantVector(NewCF, true) &&
+      !isConstantFPBuildVectorOrConstantFP(NewCF))
+    return SDValue();
 
   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
 }
@@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 
   // TODO: What is the 'or' equivalent of this fold?
   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
-  if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
+      IsInteger && CC0 == ISD::SETNE &&
       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
     SDValue One = DAG.getConstant(1, DL, OpVT);
@@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
       VT.getSizeInBits() <= 64) {
     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-      APInt ADDC = ADDI->getAPIntValue();
-      if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+      if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
         // immediate for an add, but it is legal if its top c2 bits are set,
         // transform the ADD so the immediate doesn't need to be materialized
         // in a register.
-        if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+        APInt ADDC = ADDI->getAPIntValue();
+        APInt SRLC = SRLI->getAPIntValue();
+        if (ADDC.getMinSignedBits() <= 64 &&
+            SRLC.ult(VT.getSizeInBits()) &&
+            !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
-                                             SRLI->getZExtValue());
+                                             SRLC.getZExtValue());
           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
             ADDC |= Mask;
             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp
index 68e0ce39a54e..04deb434cec2 100644
--- a/lib/IR/SafepointIRVerifier.cpp
+++ b/lib/IR/SafepointIRVerifier.cpp
@@ -237,6 +237,59 @@ class InstructionVerifier;
 /// Builds BasicBlockState for each BB of the function.
 /// It can traverse function for verification and provides all required
 /// information.
+///
+/// GC pointer may be in one of three states: relocated, unrelocated and
+/// poisoned.
+/// Relocated pointer may be used without any restrictions.
+/// Unrelocated pointer cannot be dereferenced, passed as argument to any call
+/// or returned. Unrelocated pointer may be safely compared against another
+/// unrelocated pointer or against a pointer exclusively derived from null.
+/// Poisoned pointers are produced when we somehow derive pointer from relocated
+/// and unrelocated pointers (e.g. phi, select). This pointers may be safely
+/// used in a very limited number of situations. Currently the only way to use
+/// it is comparison against constant exclusively derived from null. All
+/// limitations arise due to their undefined state: this pointers should be
+/// treated as relocated and unrelocated simultaneously.
+/// Rules of deriving:
+/// R + U = P - that's where the poisoned pointers come from
+/// P + X = P
+/// U + U = U
+/// R + R = R
+/// X + C = X
+/// Where "+" - any operation that somehow derive pointer, U - unrelocated,
+/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
+/// nothing (in case when "+" is unary operation).
+/// Deriving of pointers by itself is always safe.
+/// NOTE: when we are making decision on the status of instruction's result:
+/// a) for phi we need to check status of each input *at the end of
+///    corresponding predecessor BB*.
+/// b) for other instructions we need to check status of each input *at the
+///    current point*.
+///
+/// FIXME: This works fairly well except one case
+///     bb1:
+///     p = *some GC-ptr def*
+///     p1 = gep p, offset
+///         /     |
+///        /      |
+///    bb2:       |
+///    safepoint  |
+///        \      |
+///         \     |
+///      bb3:
+///      p2 = phi [p, bb2] [p1, bb1]
+///      p3 = phi [p, bb2] [p, bb1]
+///      here p and p1 is unrelocated
+///           p2 and p3 is poisoned (though they shouldn't be)
+///
+/// This leads to some weird results:
+///      cmp eq p, p2 - illegal instruction (false-positive)
+///      cmp eq p1, p2 - illegal instruction (false-positive)
+///      cmp eq p, p3 - illegal instruction (false-positive)
+///      cmp eq p, p1 - ok
+/// To fix this we need to introduce conception of generations and be able to
+/// check if two values belong to one generation or not. This way p2 will be
+/// considered to be unrelocated and no false alarm will happen.
 class GCPtrTracker {
   const Function &F;
   SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
@@ -244,6 +297,9 @@ class GCPtrTracker {
   // This set contains defs of unrelocated pointers that are proved to be legal
   // and don't need verification.
   DenseSet<const Instruction *> ValidUnrelocatedDefs;
+  // This set contains poisoned defs. They can be safely ignored during
+  // verification too.
+  DenseSet<const Value *> PoisonedDefs;
 
 public:
   GCPtrTracker(const Function &F, const DominatorTree &DT);
@@ -251,6 +307,8 @@ public:
   BasicBlockState *getBasicBlockState(const BasicBlock *BB);
   const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
 
+  bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
+
   /// Traverse each BB of the function and call
   /// InstructionVerifier::verifyInstruction for each possibly invalid
   /// instruction.
@@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState(
 }
 
 bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
-  return ValidUnrelocatedDefs.count(I);
+  // Poisoned defs are skipped since they are always safe by itself by
+  // definition (for details see comment to this class).
+  return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
 }
 
 void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
@@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
          "Passed Contribution should be from the passed BasicBlockState!");
   AvailableValueSet AvailableSet = BBS->AvailableIn;
   bool ContributionChanged = false;
+  // For explanation why instructions are processed this way see
+  // "Rules of deriving" in the comment to this class.
   for (const Instruction &I : *BB) {
-    bool ProducesUnrelocatedPointer = false;
-    if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
-        containsGCPtrType(I.getType())) {
-      // GEP/bitcast of unrelocated pointer is legal by itself but this
-      // def shouldn't appear in any AvailableSet.
+    bool ValidUnrelocatedPointerDef = false;
+    bool PoisonedPointerDef = false;
+    // TODO: `select` instructions should be handled here too.
+    if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+      if (containsGCPtrType(PN->getType())) {
+        // If both is true, output is poisoned.
+        bool HasRelocatedInputs = false;
+        bool HasUnrelocatedInputs = false;
+        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+          const BasicBlock *InBB = PN->getIncomingBlock(i);
+          const Value *InValue = PN->getIncomingValue(i);
+
+          if (isNotExclusivelyConstantDerived(InValue)) {
+            if (isValuePoisoned(InValue)) {
+              // If any of inputs is poisoned, output is always poisoned too.
+              HasRelocatedInputs = true;
+              HasUnrelocatedInputs = true;
+              break;
+            }
+            if (BlockMap[InBB]->AvailableOut.count(InValue))
+              HasRelocatedInputs = true;
+            else
+              HasUnrelocatedInputs = true;
+          }
+        }
+        if (HasUnrelocatedInputs) {
+          if (HasRelocatedInputs)
+            PoisonedPointerDef = true;
+          else
+            ValidUnrelocatedPointerDef = true;
+        }
+      }
+    } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
+               containsGCPtrType(I.getType())) {
+      // GEP/bitcast of unrelocated pointer is legal by itself but this def
+      // shouldn't appear in any AvailableSet.
       for (const Value *V : I.operands())
         if (containsGCPtrType(V->getType()) &&
             isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
-          ProducesUnrelocatedPointer = true;
+          if (isValuePoisoned(V))
+            PoisonedPointerDef = true;
+          else
+            ValidUnrelocatedPointerDef = true;
           break;
         }
     }
-    if (!ProducesUnrelocatedPointer) {
-      bool Cleared = false;
-      transferInstruction(I, Cleared, AvailableSet);
-      (void)Cleared;
-    } else {
-      // Remove def of unrelocated pointer from Contribution of this BB
-      // and trigger update of all its successors.
+    assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
+           "Value cannot be both unrelocated and poisoned!");
+    if (ValidUnrelocatedPointerDef) {
+      // Remove def of unrelocated pointer from Contribution of this BB and
+      // trigger update of all its successors.
       Contribution.erase(&I);
+      PoisonedDefs.erase(&I);
       ValidUnrelocatedDefs.insert(&I);
-      DEBUG(dbgs() << "Removing " << I << " from Contribution of "
+      DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of "
                    << BB->getName() << "\n");
       ContributionChanged = true;
+    } else if (PoisonedPointerDef) {
+      // Mark pointer as poisoned, remove its def from Contribution and trigger
+      // update of all successors.
+      Contribution.erase(&I);
+      PoisonedDefs.insert(&I);
+      DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
+                   << BB->getName() << "\n");
+      ContributionChanged = true;
+    } else {
+      bool Cleared = false;
+      transferInstruction(I, Cleared, AvailableSet);
+      (void)Cleared;
     }
   }
   return ContributionChanged;
@@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction(
 
     // Returns true if LHS and RHS are unrelocated pointers and they are
     // valid unrelocated uses.
-    auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS,
-                                   &RHS] () {
+    auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
+                                   &LHS, &RHS] () {
         // A cmp instruction has valid unrelocated pointer operands only if
         // both operands are unrelocated pointers.
         // In the comparison between two pointers, if one is an unrelocated
@@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction(
             (baseTyLHS == BaseType::NonConstant &&
              baseTyRHS == BaseType::ExclusivelySomeConstant))
           return false;
+
+        // If one of pointers is poisoned and other is not exclusively derived
+        // from null it is an invalid expression: it produces poisoned result
+        // and unless we want to track all defs (not only gc pointers) the only
+        // option is to prohibit such instructions.
+        if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
+            (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
+            return false;
+
         // All other cases are valid cases enumerated below:
-        // 1. Comparison between an exlusively derived null pointer and a
+        // 1. Comparison between an exclusively derived null pointer and a
         // constant base pointer.
-        // 2. Comparison between an exlusively derived null pointer and a
+        // 2. Comparison between an exclusively derived null pointer and a
         // non-constant unrelocated base pointer.
         // 3. Comparison between 2 unrelocated pointers.
+        // 4. Comparison between a pointer exclusively derived from null and a
+        // non-constant poisoned pointer.
         return true;
     };
     if (!hasValidUnrelocatedUse()) {
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index c8b3892375f6..abcd8905ad35 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
   if (TempDir.empty())
     return;
   // User asked to save temps, let dump the bitcode file after import.
-  std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
+  std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
   std::error_code EC;
   raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
   if (EC)
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index f94e9d3c4785..3e2150a451e0 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -427,13 +427,13 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) {
     GroupName = getTok().getString();
     Lex();
   } else if (getParser().parseIdentifier(GroupName)) {
-    return true;
+    return TokError("invalid group name");
   }
   if (L.is(AsmToken::Comma)) {
     Lex();
     StringRef Linkage;
     if (getParser().parseIdentifier(Linkage))
-      return true;
+      return TokError("invalid linkage");
     if (Linkage != "comdat")
       return TokError("Linkage must be 'comdat'");
   }
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index ccc8cc56eb0a..8dbd58632f0e 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const {
   }
 
   std::vector<InstantiationGroup> Result;
-  for (const auto &InstantiationSet : InstantiationSetCollector) {
+  for (auto &InstantiationSet : InstantiationSetCollector) {
     InstantiationGroup IG{InstantiationSet.first.first,
                           InstantiationSet.first.second,
                           std::move(InstantiationSet.second)};
diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp
index 3d800eb7a96c..e39bddc4e8f2 100644
--- a/lib/Support/ARMAttributeParser.cpp
+++ b/lib/Support/ARMAttributeParser.cpp
@@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
       ParseIndexList(Data, Offset, Indicies);
       break;
     default:
-      errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n';
+      errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n';
       return;
     }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b547a0932709..4caf4a4fdce0 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
          O->getNumOccurrencesFlag() == cl::OneOrMore;
 }
 
-static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
+static bool isWhitespace(char C) {
+  return C == ' ' || C == '\t' || C == '\r' || C == '\n';
+}
 
 static bool isQuote(char C) { return C == '\"' || C == '\''; }
 
@@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
         break;
     }
 
+    char C = Src[I];
+
     // Backslash escapes the next character.
-    if (I + 1 < E && Src[I] == '\\') {
+    if (I + 1 < E && C == '\\') {
       ++I; // Skip the escape.
       Token.push_back(Src[I]);
       continue;
     }
 
     // Consume a quoted string.
-    if (isQuote(Src[I])) {
-      char Quote = Src[I++];
-      while (I != E && Src[I] != Quote) {
+    if (isQuote(C)) {
+      ++I;
+      while (I != E && Src[I] != C) {
         // Backslash escapes the next character.
         if (Src[I] == '\\' && I + 1 != E)
           ++I;
@@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     }
 
     // End the token if this is whitespace.
-    if (isWhitespace(Src[I])) {
+    if (isWhitespace(C)) {
       if (!Token.empty())
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
       Token.clear();
@@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     }
 
     // This is a normal character.  Append it.
-    Token.push_back(Src[I]);
+    Token.push_back(C);
   }
 
   // Append the last token after hitting EOF with no whitespace.
@@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
   // end of the source string.
   enum { INIT, UNQUOTED, QUOTED } State = INIT;
   for (size_t I = 0, E = Src.size(); I != E; ++I) {
+    char C = Src[I];
+
     // INIT state indicates that the current input index is at the start of
     // the string or between tokens.
     if (State == INIT) {
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         State = UNQUOTED;
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       State = UNQUOTED;
       continue;
     }
@@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     // quotes.
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
         Token.clear();
         State = INIT;
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       continue;
     }
 
     // QUOTED state means that it's reading a token quoted by double quotes.
     if (State == QUOTED) {
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = UNQUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
     }
   }
   // Append the last token after hitting EOF with no whitespace.
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index fc9d0cc08885..be35f894cccd 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
     return 1;
 
   if (ErrorsPrinted > 0)
-    return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n");
+    return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
 
   // Declare success.
   Out.keep();
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index abbba7d1d5a9..40836b00b9e6 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root,
     }
     break;
   case AArch64::FSUBv2f32:
-    if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                           AArch64::FMULv2i32_indexed)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
-      Found = true;
-    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                                  AArch64::FMULv2f32)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
-      Found = true;
-    }
     if (canCombineWithFMUL(MBB, Root.getOperand(2),
                            AArch64::FMULv2i32_indexed)) {
       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
@@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root,
       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
       Found = true;
     }
-    break;
-  case AArch64::FSUBv2f64:
     if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                           AArch64::FMULv2i64_indexed)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
+                           AArch64::FMULv2i32_indexed)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
       Found = true;
     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                                  AArch64::FMULv2f64)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
+                                  AArch64::FMULv2f32)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
       Found = true;
     }
+    break;
+  case AArch64::FSUBv2f64:
     if (canCombineWithFMUL(MBB, Root.getOperand(2),
                            AArch64::FMULv2i64_indexed)) {
       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
@@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root,
       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
       Found = true;
     }
-    break;
-  case AArch64::FSUBv4f32:
     if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                           AArch64::FMULv4i32_indexed)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+                           AArch64::FMULv2i64_indexed)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
       Found = true;
     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
-                                  AArch64::FMULv4f32)) {
-      Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+                                  AArch64::FMULv2f64)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
       Found = true;
     }
+    break;
+  case AArch64::FSUBv4f32:
     if (canCombineWithFMUL(MBB, Root.getOperand(2),
                            AArch64::FMULv4i32_indexed)) {
       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
@@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root,
       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
       Found = true;
     }
+    if (canCombineWithFMUL(MBB, Root.getOperand(1),
+                           AArch64::FMULv4i32_indexed)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+      Found = true;
+    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+                                  AArch64::FMULv4f32)) {
+      Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+      Found = true;
+    }
     break;
   }
   return Found;
@@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
   It = MBB.insert(It, LDRXpost);
 
   return It;
-}
-\ No newline at end of file
+}
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2aa395642c40..753cfff4cdae 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
     llvm_unreachable("unsupported fp type");
 
   APInt API = APF.bitcastToAPInt();
-  std::string hexstr(utohexstr(API.getZExtValue()));
-  O << lead;
-  if (hexstr.length() < numHex)
-    O << std::string(numHex - hexstr.length(), '0');
-  O << utohexstr(API.getZExtValue());
+  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
 }
 
 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 86a28f7d0700..a754a6a36dab 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/Support/Format.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "nvptx-mcexpr"
@@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
   }
 
   APInt API = APF.bitcastToAPInt();
-  std::string HexStr(utohexstr(API.getZExtValue()));
-  if (HexStr.length() < NumHex)
-    OS << std::string(NumHex - HexStr.length(), '0');
-  OS << utohexstr(API.getZExtValue());
+  OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true);
 }
 
 const NVPTXGenericMCSymbolRefExpr*
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index d19463ccb51f..204d97cbdd44 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     if (isVerbose()) {
       OutStreamer->AddComment("fallthrough-return: $pop" +
-                              utostr(MFI->getWARegStackId(
+                              Twine(MFI->getWARegStackId(
                                   MFI->getWAReg(MI->getOperand(0).getReg()))));
       OutStreamer->AddBlankLine();
     }
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 87c65347e334..f1ce430f3323 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     Flags |= Prefix;
     Name = Parser.getTok().getString();
     Parser.Lex(); // eat the prefix
-    // Hack: we could have something like
+    // Hack: we could have something like "rep # some comment" or
     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
     while (Name.startswith(";") || Name.startswith("\n") ||
-           Name.startswith("\t") || Name.startswith("/")) {
+           Name.startswith("#") || Name.startswith("\t") ||
+           Name.startswith("/")) {
       Name = Parser.getTok().getString();
       Parser.Lex(); // go to next prefix or instr
     }
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7e7c35569093..ba998467b799 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
   FeatureVNNI,
   FeatureVPCLMULQDQ,
   FeatureVPOPCNTDQ,
-  FeatureGFNI
+  FeatureGFNI,
+  FeatureCLWB
 ]>;
 
 class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2c1faa157ddb..ba3b02e25a9d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
       setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
-
-      setOperationAction(ISD::MUL,        MVT::v8i64, Legal);
     }
 
     if (Subtarget.hasCDI()) {
@@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
         setOperationAction(ISD::UINT_TO_FP,     VT, Legal);
         setOperationAction(ISD::FP_TO_SINT,     VT, Legal);
         setOperationAction(ISD::FP_TO_UINT,     VT, Legal);
-
-        setOperationAction(ISD::MUL,            VT, Legal);
       }
     }
 
@@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
   return DAG.getConstant(Immediate, dl, VT);
 }
 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
-SDValue
-X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
 
   MVT VT = Op.getSimpleValueType();
   assert((VT.getVectorElementType() == MVT::i1) &&
@@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
           DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
       // We have to manually lower both halves so getNode doesn't try to
       // reassemble the build_vector.
-      Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
-      Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
+      Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
+      Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
       return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
     }
     SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
@@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // Generate vectors for predicate vectors.
   if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
-    return LowerBUILD_VECTORvXi1(Op, DAG);
+    return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
 
   if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
     return VectorConstant;
@@ -15658,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
 }
 
 /// 64-bit unsigned integer to double expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
-                                               SelectionDAG &DAG) const {
+static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
   // This algorithm is not obvious. Here it is what we're trying to output:
   /*
      movq       %rax,  %xmm0
@@ -15679,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   // Build some magic constants.
   static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
   Constant *C0 = ConstantDataVector::get(*Context, CV0);
-  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
 
   SmallVector<Constant*,2> CV1;
@@ -15726,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
 }
 
 /// 32-bit unsigned integer to float expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
-                                               SelectionDAG &DAG) const {
+static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
   SDLoc dl(Op);
   // FP constant to bias correct the final result.
   SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
@@ -15760,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
 
   // Handle final rounding.
-  MVT DestVT = Op.getSimpleValueType();
-
-  if (DestVT.bitsLT(MVT::f64))
-    return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
-                       DAG.getIntPtrConstant(0, dl));
-  if (DestVT.bitsGT(MVT::f64))
-    return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
-
-  // Handle final rounding.
-  return Sub;
+  return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType());
 }
 
 static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
@@ -15901,8 +15888,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
   return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
 }
 
-SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
-                                               SelectionDAG &DAG) const {
+static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
   SDValue N0 = Op.getOperand(0);
   MVT SrcVT = N0.getSimpleValueType();
   SDLoc dl(Op);
@@ -15941,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Op.getSimpleValueType().isVector())
-    return lowerUINT_TO_FP_vec(Op, DAG);
+    return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
 
   MVT SrcVT = N0.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
@@ -15954,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   }
 
   if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
-    return LowerUINT_TO_FP_i64(Op, DAG);
+    return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
   if (SrcVT == MVT::i32 && X86ScalarSSEf64)
-    return LowerUINT_TO_FP_i32(Op, DAG);
+    return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
   if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
     return SDValue();
 
@@ -22097,7 +22084,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
   if (VT == MVT::v4i32) {
     assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
-           "Should not custom lower when pmuldq is available!");
+           "Should not custom lower when pmulld is available!");
+
+    // If the upper 17 bits of each element are zero then we can use PMADD.
+    APInt Mask17 = APInt::getHighBitsSet(32, 17);
+    if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17))
+      return DAG.getNode(X86ISD::VPMADDWD, dl, VT,
+                         DAG.getBitcast(MVT::v8i16, A),
+                         DAG.getBitcast(MVT::v8i16, B));
 
     // Extract the odd parts.
     static const int UnpackMask[] = { 1, -1, 3, -1 };
@@ -22149,6 +22143,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
   bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
 
+  // If DQI is supported we can use MULLQ, but MULUDQ is still better if the
+  // the high bits are known to be zero.
+  if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero))
+    return Op;
+
   // Bit cast to 32-bit vectors for MULUDQ.
   SDValue Alo = DAG.getBitcast(MulVT, A);
   SDValue Blo = DAG.getBitcast(MulVT, B);
@@ -31012,8 +31011,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
     DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
   }
 
-  // The replacement was made in place; don't return anything.
-  return SDValue();
+  // The replacement was made in place; return N so it won't be revisited.
+  return SDValue(N, 0);
 }
 
 /// If a vector select has an operand that is -1 or 0, try to simplify the
@@ -32267,6 +32266,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   if ((NumElts % 2) != 0)
     return SDValue();
 
+  // If the upper 17 bits of each element are zero then we can use PMADD.
+  APInt Mask17 = APInt::getHighBitsSet(32, 17);
+  if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) &&
+      DAG.MaskedValueIsZero(N1, Mask17))
+    return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0),
+                       DAG.getBitcast(MVT::v8i16, N1));
+
   unsigned RegSize = 128;
   MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
   EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
@@ -34882,7 +34888,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
     // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
     // better to truncate if we have the chance.
     if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
-        !TLI.isOperationLegal(Opcode, SrcVT))
+        !Subtarget.hasDQI())
       return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
     LLVM_FALLTHROUGH;
   case ISD::ADD: {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 8464081b1b08..7708f577ba70 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1167,7 +1167,6 @@ namespace llvm {
                                                bool isReplace) const;
 
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
@@ -1183,9 +1182,6 @@ namespace llvm {
 
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 2a2286e42405..46c19f18f8d3 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
                                      SSE_INTALU_ITINS_P, HasBWI, 0>;
 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
-                                    SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+                                    SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
-                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
+                                    SSE_INTMUL_ITINS_P, HasBWI, 1>;
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
-                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
+                                    SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
                                     HasBWI, 1>;
 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
                                      HasBWI, 1>;
@@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
   }
 }
 
-defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
                                 avx512vl_i32_info, avx512vl_i64_info,
                                 X86pmuldq, HasAVX512, 1>,T8PD;
 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b48fa1841979..cb84f9aecf79 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
        Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
@@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[itins.Sched.Folded, ReadAfterLd]>;
+                                     (bitconvert (memop_frag addr:$src2)))))],
+       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
        Sched<[itins.Sched]>;
   def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
@@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
-         (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[itins.Sched.Folded, ReadAfterLd]>;
+         (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))],
+       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
@@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
        Sched<[itins.Sched]>;
   def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
@@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[itins.Sched.Folded, ReadAfterLd]>;
+                                     (bitconvert (memop_frag addr:$src2)))))],
+       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
@@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>;
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
-         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 7086c2eb52c4..a69c009e1a54 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -181,8 +181,9 @@ public:
       StringRef Name, bool IsThinLTOPreLink,
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
       std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
-      : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
-        Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
+      : GetAC(std::move(GetAssumptionCache)),
+        GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
+        IsThinLTOPreLink(IsThinLTOPreLink) {}
 
   bool doInitialization(Module &M);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM);
@@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
 
   // Populate the symbol map.
   for (const auto &N_F : M.getValueSymbolTable()) {
-    std::string OrigName = N_F.getKey();
+    StringRef OrigName = N_F.getKey();
     Function *F = dyn_cast<Function>(N_F.getValue());
     if (F == nullptr)
       continue;
     SymbolMap[OrigName] = F;
     auto pos = OrigName.find('.');
-    if (pos != std::string::npos) {
-      std::string NewName = OrigName.substr(0, pos);
+    if (pos != StringRef::npos) {
+      StringRef NewName = OrigName.substr(0, pos);
       auto r = SymbolMap.insert(std::make_pair(NewName, F));
       // Failiing to insert means there is already an entry in SymbolMap,
       // thus there are multiple functions that are mapped to the same
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 945133074059..caffc03339c4 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
     if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
       Metadata *&GlobalMD = LocalToGlobal[MD];
       if (!GlobalMD) {
-        std::string NewName =
-            (to_string(LocalToGlobal.size()) + ModuleId).str();
+        std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
         GlobalMD = MDString::get(M.getContext(), NewName);
       }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a088d447337f..40e52ee755e5 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
 /// instructions. For normal calls, it allows visitCallSite to do the heavy
 /// lifting.
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
-  auto Args = CI.arg_operands();
-  if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
-                              Args.end(), SQ.getWithInstruction(&CI)))
+  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
     return replaceInstUsesWith(CI, V);
 
   if (isFreeCall(&CI, &TLI))
@@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
             lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
       return replaceInstUsesWith(CI, N);
     return nullptr;
-
   case Intrinsic::bswap: {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
-    // TODO should this be in InstSimplify?
-    // bswap(bswap(x)) -> x
-    if (match(IIOperand, m_BSwap(m_Value(X))))
-      return replaceInstUsesWith(CI, X);
-
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
       unsigned C = X->getType()->getPrimitiveSizeInBits() -
@@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
   }
-
-  case Intrinsic::bitreverse: {
-    Value *IIOperand = II->getArgOperand(0);
-    Value *X = nullptr;
-
-    // TODO should this be in InstSimplify?
-    // bitreverse(bitreverse(x)) -> x
-    if (match(IIOperand, m_BitReverse(m_Value(X))))
-      return replaceInstUsesWith(CI, X);
-    break;
-  }
-
   case Intrinsic::masked_load:
     if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
       return replaceInstUsesWith(CI, SimplifiedMaskedOp);
@@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // powi(x, 0) -> 1.0
-      if (Power->isZero())
-        return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
-      // powi(x, 1) -> x
-      if (Power->isOne())
-        return replaceInstUsesWith(CI, II->getArgOperand(0));
+      // 0 and 1 are handled in instsimplify
+
       // powi(x, -1) -> 1/x
       if (Power->isMinusOne())
         return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
                                           II->getArgOperand(0));
+      // powi(x, 2) -> x*x
+      if (Power->equalsInt(2))
+        return BinaryOperator::CreateFMul(II->getArgOperand(0),
+                                          II->getArgOperand(0));
     }
     break;
 
@@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // The compare intrinsic uses the above assumptions and therefore
     // doesn't require additional flags.
     if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
-         match(Arg1, m_Zero()) &&
+         match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
          cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
       if (Arg0IsZero)
         std::swap(A, B);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87666360c1a0..541dde6c47d2 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  // Handle cases involving: rem X, (select Cond, Y, Z)
-  if (simplifyDivRemOfSelectWithZeroOp(I))
-    return &I;
-
   return nullptr;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 65a96b965227..aeac8910af6b 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-    unsigned IndexVal = IdxC->getZExtValue();
     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
 
-    // InstSimplify handles cases where the index is invalid.
-    assert(IndexVal < VectorWidth);
+    // InstSimplify should handle cases where the index is invalid.
+    if (!IdxC->getValue().ule(VectorWidth))
+      return nullptr;
+
+    unsigned IndexVal = IdxC->getZExtValue();
 
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8328d4031941..8e39f24d819c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
       unsigned Align = Arg.getParamAlignment();
       if (Align == 0) Align = DL.getABITypeAlignment(Ty);
 
-      const std::string &Name = Arg.hasName() ? Arg.getName().str() :
-          "Arg" + llvm::to_string(Arg.getArgNo());
-      AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
+      AllocaInst *AI = IRB.CreateAlloca(
+          Ty, nullptr,
+          (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
+              ".byval");
       AI->setAlignment(Align);
       Arg.replaceAllUsesWith(AI);
 
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 814a62cd7d65..bf92e43c4715 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
   DenseMap<uint32_t, unsigned> VNums;
   for (auto *I : Insts) {
     uint32_t N = VN.lookupOrAdd(I);
-    DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+    DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
     if (N == ~0U)
       return None;
     VNums[N]++;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6af3fef963dc..9c870b42a747 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
       Alignment = DL.getABITypeAlignment(EltType);
     }
 
-    // Remember the debug location.
-    DebugLoc Loc;
-    if (!Range.TheStores.empty())
-      Loc = Range.TheStores[0]->getDebugLoc();
+    AMemSet =
+      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
 
     DEBUG(dbgs() << "Replace stores:\n";
           for (Instruction *SI : Range.TheStores)
-            dbgs() << *SI << '\n');
+            dbgs() << *SI << '\n';
+          dbgs() << "With: " << *AMemSet << '\n');
+
+    if (!Range.TheStores.empty())
+      AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
 
     // Zap all the stores.
     for (Instruction *SI : Range.TheStores) {
       MD->removeInstruction(SI);
       SI->eraseFromParent();
     }
-
-    // Create the memset after removing the stores, so that if there any cached
-    // non-local dependencies on the removed instructions in
-    // MemoryDependenceAnalysis, the cache entries are updated to "dirty"
-    // entries pointing below the memset, so subsequent queries include the
-    // memset.
-    AMemSet =
-      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
-    if (!Range.TheStores.empty())
-      AMemSet->setDebugLoc(Loc);
-
-    DEBUG(dbgs() << "With: " << *AMemSet << '\n');
-
     ++NumMemSetInfer;
   }
 
@@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
   //
   // NOTE: This is conservative, it will stop on any read from the source loc,
   // not just the defining memcpy.
-  MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
-  MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
-                                                        M->getIterator(), M->getParent());
-
-  if (SourceDep.isNonLocal()) {
-    SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
-    MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
-                                         NonLocalDepResults);
-    if (NonLocalDepResults.size() == 1) {
-      SourceDep = NonLocalDepResults[0].getResult();
-      assert((!SourceDep.getInst() ||
-              LookupDomTree().dominates(SourceDep.getInst(), M)) &&
-             "when memdep returns exactly one result, it should dominate");
-    }
-  }
-
+  MemDepResult SourceDep =
+      MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+                                   M->getIterator(), M->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
@@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
   MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
       SrcLoc, true, M->getIterator(), M->getParent());
 
-  if (SrcDepInfo.isNonLocal()) {
-    SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
-    MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
-                                         NonLocalDepResults);
-    if (NonLocalDepResults.size() == 1) {
-      SrcDepInfo = NonLocalDepResults[0].getResult();
-      assert((!SrcDepInfo.getInst() ||
-              LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
-             "when memdep returns exactly one result, it should dominate");
-    }
-  }
-
   if (SrcDepInfo.isClobber()) {
     if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
       return processMemCpyMemCpyDependence(M, MDep);
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3b45cfa482e6..c44edbed8ed9 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
   StatepointLiveSetTy Updated;
   findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
 
-#ifndef NDEBUG
-  DenseSet<Value *> Bases;
-  for (auto KVPair : Info.PointerToBase)
-    Bases.insert(KVPair.second);
-#endif
-
   // We may have base pointers which are now live that weren't before.  We need
   // to update the PointerToBase structure to reflect this.
   for (auto V : Updated)
     if (Info.PointerToBase.insert({V, V}).second) {
-      assert(Bases.count(V) && "Can't find base for unexpected live value!");
+      assert(isKnownBaseResult(V) &&
+             "Can't find base for unexpected live value!");
       continue;
     }
 
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index efff06f79cb7..e00541d3c812 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
     SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
     NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
                                      DT, LI, PreserveLCSSA);
+    // NewExit gets its DebugLoc from LatchExit, which is not part of the
+    // original Loop.
+    // Fix this by setting Loop's DebugLoc to NewExit.
+    auto *NewExitTerminator = NewExit->getTerminator();
+    NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
     // Split NewExit to insert epilog remainder loop.
-    EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+    EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
     EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
   } else {
     // If prolog remainder
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b3c80424c8b9..e7358dbcb624 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(
     cl::desc("Limit maximum recursion depth when calculating costs of "
              "speculatively executed instructions"));
 
-static cl::opt<unsigned> DependenceChainLatency(
-    "dependence-chain-latency", cl::Hidden, cl::init(8),
-    cl::desc("Limit the maximum latency of dependence chain containing cmp "
-             "for if conversion"));
-
-static cl::opt<unsigned> SmallBBSize(
-    "small-bb-size", cl::Hidden, cl::init(40),
-    cl::desc("Check dependence chain latency only in basic block smaller than "
-             "this number"));
-
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps,
           "Number of switch instructions turned into linear mapping");
@@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   return true;
 }
 
-/// Estimate the code size of the specified BB.
-static unsigned CountBBCodeSize(BasicBlock *BB,
-                                const TargetTransformInfo &TTI) {
-  unsigned Size = 0;
-  for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
-    Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
-  return Size;
-}
-
-/// Find out the latency of the longest dependence chain in the BB if
-/// LongestChain is true, or the dependence chain containing the compare
-/// instruction feeding the block's conditional branch.
-static unsigned FindDependenceChainLatency(BasicBlock *BB,
-                            DenseMap<Instruction *, unsigned> &Instructions,
-                            const TargetTransformInfo &TTI,
-                            bool LongestChain) {
-  unsigned MaxLatency = 0;
-
-  BasicBlock::iterator II;
-  for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
-    unsigned Latency = 0;
-    for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
-      Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
-      if (Op && Instructions.count(Op)) {
-        auto OpLatency = Instructions[Op];
-        if (OpLatency > Latency)
-          Latency = OpLatency;
-      }
-    }
-    Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
-    Instructions[&(*II)] = Latency;
-
-    if (Latency > MaxLatency)
-      MaxLatency = Latency;
-  }
-
-  if (LongestChain)
-    return MaxLatency;
-
-  // The length of the dependence chain containing the compare instruction is
-  // wanted, so the terminator must be a BranchInst.
-  assert(isa<BranchInst>(II));
-  BranchInst* Br = cast<BranchInst>(II);
-  Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
-  if (Cmp && Instructions.count(Cmp))
-    return Instructions[Cmp];
-  else
-    return 0;
-}
-
-/// Instructions in BB2 may depend on instructions in BB1, and instructions
-/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
-/// of instruction in BB1 is I, then the instructions after I can be executed
-/// in parallel with instructions in BB2.
-/// This function returns the latency of I.
-static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
-                        BasicBlock *IfBlock1, BasicBlock *IfBlock2,
-                        DenseMap<Instruction *, unsigned> &BB1Instructions) {
-  unsigned LastLatency = 0;
-  SmallVector<Instruction *, 16> Worklist;
-  BasicBlock::iterator II;
-  for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
-    if (PHINode *PN = dyn_cast<PHINode>(II)) {
-      // Look for users in BB2.
-      bool InBBUser = false;
-      for (User *U : PN->users()) {
-        if (cast<Instruction>(U)->getParent() == BB2) {
-          InBBUser = true;
-          break;
-        }
-      }
-      // No such user, we don't care about this instruction and its operands.
-      if (!InBBUser)
-        break;
-    }
-    Worklist.push_back(&(*II));
-  }
-
-  while (!Worklist.empty()) {
-    Instruction *I = Worklist.pop_back_val();
-    for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
-      if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
-        if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
-          Worklist.push_back(Op);
-        else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
-          if (BB1Instructions[Op] > LastLatency)
-            LastLatency = BB1Instructions[Op];
-        }
-      }
-    }
-  }
-
-  return LastLatency;
-}
-
-/// If after if conversion, most of the instructions in this new BB construct a
-/// long and slow dependence chain, it may be slower than cmp/branch, even
-/// if the branch has a high miss rate, because the control dependence is
-/// transformed into data dependence, and control dependence can be speculated,
-/// and thus, the second part can execute in parallel with the first part on
-/// modern OOO processor.
-///
-/// To check this condition, this function finds the length of the dependence
-/// chain in BB1 (only the part that can be executed in parallel with code after
-/// branch in BB2) containing cmp, and if the length is longer than a threshold,
-/// don't perform if conversion.
-///
-/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
-/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
-static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
-                             BasicBlock *IfBlock1, BasicBlock *IfBlock2,
-                             unsigned SpeculationSize,
-                             const TargetTransformInfo &TTI) {
-  // Accumulated latency of each instruction in their BBs.
-  DenseMap<Instruction *, unsigned> BB1Instructions;
-  DenseMap<Instruction *, unsigned> BB2Instructions;
-
-  if (!TTI.isOutOfOrder())
-    return false;
-
-  unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
-                         + SpeculationSize;
-
-  // We check small BB only since it is more difficult to find unrelated
-  // instructions to fill functional units in a small BB.
-  if (NewBBSize > SmallBBSize)
-    return false;
-
-  auto BB1Chain =
-         FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
-  auto BB2Chain =
-         FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
-
-  // If there are many unrelated instructions in the new BB, there will be
-  // other instructions for the processor to issue regardless of the length
-  // of this new dependence chain.
-  // Modern processors can issue 3 or more instructions in each cycle. But in
-  // real world applications, an IPC of 2 is already very good for non-loop
-  // code with small basic blocks. Higher IPC is usually found in programs with
-  // small kernel. So IPC of 2 is more reasonable for most applications.
-  if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
-    return false;
-
-  // We only care about part of the dependence chain in BB1 that can be
-  // executed in parallel with BB2, so adjust the latency.
-  BB1Chain -=
-      LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
-
-  // Correctly predicted branch instruction can skip the dependence chain in
-  // BB1, but misprediction has a penalty, so only when the dependence chain is
-  // longer than DependenceChainLatency, then branch is better than select.
-  // Besides misprediction penalty, the threshold value DependenceChainLatency
-  // also depends on branch misprediction rate, taken branch latency and cmov
-  // latency.
-  if (BB1Chain >= DependenceChainLatency)
-    return true;
-
-  return false;
-}
-
 /// Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
 static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
@@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
     return false;
 
-  // Don't do if conversion for long dependence chain.
-  if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
-                              CountBBCodeSize(ThenBB, TTI), TTI))
-    return false;
-
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
@@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
       }
   }
 
-  if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
-                              AggressiveInsts.size(), TTI))
-    return false;
-
   DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
                << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");