aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp79
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp19
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp128
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h54
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp11
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp2
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp33
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp71
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp86
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp1
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp815
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp8
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp82
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp4
24 files changed, 1016 insertions, 437 deletions
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 203594572618..ec06d5f9fb05 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -44,8 +44,12 @@
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops", cl::Hidden,
- cl::desc("Run the Loop vectorization passes"));
+ RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
+
+static cl::opt<bool>
+ RunLoopVectorization("vectorize-loops", cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::Hidden,
@@ -516,6 +520,8 @@ void PassManagerBuilder::populateModulePassManager(
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (RunPartialInlining)
+ MPM.add(createPartialInliningPass());
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
!PrepareForThinLTO)
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0ca62b7ae40c..733eeb1767a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -852,8 +852,9 @@ Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
-bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowSignedSub(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
@@ -869,8 +870,8 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
// Subtraction of two 2's complement numbers having identical signs will
// never overflow.
- if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) ||
- (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]))
+ if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
+ (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
return true;
// TODO: implement logic similar to checkRippleForAdd
@@ -879,8 +880,9 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
-bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI);
KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI);
@@ -1180,7 +1182,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Constant *CI =
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+ willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd =
Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
@@ -1197,7 +1199,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (LHSConv->getOperand(0)->getType() ==
RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ willNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
@@ -1275,10 +1277,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // TODO(jingyue): Consider WillNotOverflowSignedAdd and
+ // TODO(jingyue): Consider willNotOverflowSignedAdd and
// willNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1351,7 +1353,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
+ willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
CI, "addconv");
@@ -1370,7 +1372,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// and if the integer add will not overflow.
if (LHSIntVal->getType() == RHSIntVal->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
+ willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
RHSIntVal, "addconv");
@@ -1676,11 +1678,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
+ if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 82dc88f1b3ad..4227b2d01be8 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -764,7 +764,7 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
}
/// Fold (icmp)&(icmp) if possible.
-Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -943,7 +943,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns
/// a Value which should already be inserted into the function.
-Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
@@ -1126,8 +1126,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
if (ICmp0 && ICmp1) {
- Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1)
- : FoldOrOfICmps(ICmp0, ICmp1, &I);
+ Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1)
+ : foldOrOfICmps(ICmp0, ICmp1, &I);
if (Res)
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
@@ -1138,8 +1138,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
if (FCmp0 && FCmp1) {
- Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1)
- : FoldOrOfFCmps(FCmp0, FCmp1);
+ Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1)
+ : foldOrOfFCmps(FCmp0, FCmp1);
if (Res)
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
@@ -1425,7 +1425,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ if (Value *Res = foldAndOfICmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -1433,18 +1433,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
}
@@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ if (Value *Res = foldAndOfFCmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
@@ -1589,7 +1589,7 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
}
/// Fold (icmp)|(icmp) if possible.
-Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI) {
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -1846,7 +1846,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns
/// a Value which should already be inserted into the function.
-Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
@@ -2197,7 +2197,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = FoldOrOfICmps(LHS, RHS, &I))
+ if (Value *Res = foldOrOfICmps(LHS, RHS, &I))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -2205,18 +2205,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
}
@@ -2224,7 +2224,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ if (Value *Res = foldOrOfFCmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
@@ -2320,6 +2320,24 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
return nullptr;
}
+Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2579,23 +2597,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(Op1, m_Not(m_Specific(A))))
return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
- // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
- if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
- if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
- if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
- if (LHS->getOperand(0) == RHS->getOperand(1) &&
- LHS->getOperand(1) == RHS->getOperand(0))
- LHS->swapOperands();
- if (LHS->getOperand(0) == RHS->getOperand(0) &&
- LHS->getOperand(1) == RHS->getOperand(1)) {
- Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
- unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
- bool isSigned = LHS->isSigned() || RHS->isSigned();
- return replaceInstUsesWith(I,
- getNewICmpValue(isSigned, Code, Op0, Op1,
- Builder));
- }
- }
+ if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (Value *V = foldXorOfICmps(LHS, RHS))
+ return replaceInstUsesWith(I, V);
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 001a4bcf16f3..f4bf5221f6a2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -585,6 +585,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return CastInst::CreateIntegerCast(Shift, DestTy, false);
}
+ // FIXME: We should canonicalize to zext/trunc and remove this transform.
// Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
// conversion.
// It works because bits coming from sign extension have the same value as
@@ -595,18 +596,24 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *SExt = cast<Instruction>(Src)->getOperand(0);
const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits();
const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ const unsigned CISize = CI.getType()->getPrimitiveSizeInBits();
+ const unsigned MaxAmt = SExtSize - std::max(CISize, ASize);
unsigned ShiftAmt = Cst->getZExtValue();
+
// This optimization can be only performed when zero bits generated by
// the original lshr aren't pulled into the value after truncation, so we
// can only shift by values no larger than the number of extension bits.
// FIXME: Instead of bailing when the shift is too large, use and to clear
// the extra bits.
- if (SExt->hasOneUse() && ShiftAmt <= SExtSize - ASize) {
- // If shifting by the size of the original value in bits or more, it is
- // being filled with the sign bit, so shift by ASize-1 to avoid ub.
- Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
- Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ if (ShiftAmt <= MaxAmt) {
+ if (CISize == ASize)
+ return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(),
+ std::min(ShiftAmt, ASize - 1)));
+ if (SExt->hasOneUse()) {
+ Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 60ed4057cedd..6492eaedae9c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3506,7 +3506,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
// We can strength reduce this signed add into a regular add if we can prove
// that it will never overflow.
if (OCF == OCF_SIGNED_ADD)
- if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+ if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
true);
break;
@@ -3519,11 +3519,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return SetResult(LHS, Builder->getFalse(), false);
if (OCF == OCF_SIGNED_SUB) {
- if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+ if (willNotOverflowSignedSub(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
true);
} else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+ if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
true);
}
@@ -3553,7 +3553,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return SetResult(LHS, Builder->getFalse(), false);
if (OCF == OCF_SIGNED_MUL)
- if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+ if (willNotOverflowSignedMul(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
true);
break;
@@ -4260,6 +4260,80 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
}
+/// Integer compare with boolean values can always be turned into bitwise ops.
+static Instruction *canonicalizeICmpBool(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *A = I.getOperand(0), *B = I.getOperand(1);
+ assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only");
+
+ // A boolean compared to true/false can be simplified to Op0/true/false in
+ // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
+ // Cases not handled by InstSimplify are always 'not' of Op0.
+ if (match(B, m_Zero())) {
+ switch (I.getPredicate()) {
+ case CmpInst::ICMP_EQ: // A == 0 -> !A
+ case CmpInst::ICMP_ULE: // A <=u 0 -> !A
+ case CmpInst::ICMP_SGE: // A >=s 0 -> !A
+ return BinaryOperator::CreateNot(A);
+ default:
+ llvm_unreachable("ICmp i1 X, C not simplified as expected.");
+ }
+ } else if (match(B, m_One())) {
+ switch (I.getPredicate()) {
+ case CmpInst::ICMP_NE: // A != 1 -> !A
+ case CmpInst::ICMP_ULT: // A <u 1 -> !A
+ case CmpInst::ICMP_SGT: // A >s -1 -> !A
+ return BinaryOperator::CreateNot(A);
+ default:
+ llvm_unreachable("ICmp i1 X, C not simplified as expected.");
+ }
+ }
+
+ switch (I.getPredicate()) {
+ default:
+ llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ:
+ // icmp eq i1 A, B -> ~(A ^ B)
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ case ICmpInst::ICMP_NE:
+ // icmp ne i1 A, B -> A ^ B
+ return BinaryOperator::CreateXor(A, B);
+
+ case ICmpInst::ICMP_UGT:
+ // icmp ugt -> icmp ult
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULT:
+ // icmp ult i1 A, B -> ~A & B
+ return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
+
+ case ICmpInst::ICMP_SGT:
+ // icmp sgt -> icmp slt
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLT:
+ // icmp slt i1 A, B -> A & ~B
+ return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
+
+ case ICmpInst::ICMP_UGE:
+ // icmp uge -> icmp ule
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULE:
+ // icmp ule i1 A, B -> ~A | B
+ return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
+
+ case ICmpInst::ICMP_SGE:
+ // icmp sge -> icmp sle
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLE:
+ // icmp sle i1 A, B -> A | ~B
+ return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
+ }
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4297,49 +4371,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
- Type *Ty = Op0->getType();
-
- // icmp's with boolean values can always be turned into bitwise operations
- if (Ty->getScalarType()->isIntegerTy(1)) {
- switch (I.getPredicate()) {
- default: llvm_unreachable("Invalid icmp instruction!");
- case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
- Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp");
- return BinaryOperator::CreateNot(Xor);
- }
- case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B
- return BinaryOperator::CreateXor(Op0, Op1);
-
- case ICmpInst::ICMP_UGT:
- std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
- Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
- return BinaryOperator::CreateAnd(Not, Op1);
- }
- case ICmpInst::ICMP_SGT:
- std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
- Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
- return BinaryOperator::CreateAnd(Not, Op0);
- }
- case ICmpInst::ICMP_UGE:
- std::swap(Op0, Op1); // Change icmp uge -> icmp ule
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
- Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
- return BinaryOperator::CreateOr(Not, Op1);
- }
- case ICmpInst::ICMP_SGE:
- std::swap(Op0, Op1); // Change icmp sge -> icmp sle
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
- Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
- return BinaryOperator::CreateOr(Not, Op0);
- }
- }
- }
+ if (Op0->getType()->getScalarType()->isIntegerTy(1))
+ if (Instruction *Res = canonicalizeICmpBool(I, *Builder))
+ return Res;
if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
return NewICmp;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index f88a2c6acc3f..6829be86885b 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -275,11 +275,7 @@ public:
Instruction *visitSDiv(BinaryOperator &I);
Instruction *visitFDiv(BinaryOperator &I);
Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
- Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
- Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *visitAnd(BinaryOperator &I);
- Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
- Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
Value *B, Value *C);
Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
@@ -410,18 +406,24 @@ private:
bool DoTransform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForSignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
- bool willNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
- bool willNotOverflowUnsignedMul(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
@@ -445,6 +447,12 @@ private:
Instruction::CastOps isEliminableCastPair(const CastInst *CI1,
const CastInst *CI2);
+ Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
+ Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+
public:
/// \brief Inserts an instruction \p New before instruction \p Old
///
@@ -523,29 +531,31 @@ public:
return nullptr; // Don't do anything with FI
}
- void computeKnownBits(Value *V, KnownBits &Known,
- unsigned Depth, Instruction *CxtI) const {
+ void computeKnownBits(const Value *V, KnownBits &Known,
+ unsigned Depth, const Instruction *CxtI) const {
llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT);
}
- KnownBits computeKnownBits(Value *V, unsigned Depth,
- Instruction *CxtI) const {
+ KnownBits computeKnownBits(const Value *V, unsigned Depth,
+ const Instruction *CxtI) const {
return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT);
}
- bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
- Instruction *CxtI = nullptr) const {
+ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
}
- unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0,
- Instruction *CxtI = nullptr) const {
+ unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
}
- OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
- const Instruction *CxtI) {
+ OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
}
- OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
- const Instruction *CxtI) {
+ OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
}
OverflowResult computeOverflowForSignedAdd(const Value *LHS,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2a35259f2103..fc13854f8fe7 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -132,8 +132,9 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
-bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowSignedMul(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -384,7 +385,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Constant *CI =
ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
if (ConstantExpr::getSExt(CI, I.getType()) == Op1C &&
- WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
+ willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
// Insert the new, smaller mul.
Value *NewMul =
Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
@@ -401,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Op0Conv->getOperand(0)->getType() ==
Op1Conv->getOperand(0)->getType() &&
(Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
- WillNotOverflowSignedMul(Op0Conv->getOperand(0),
+ willNotOverflowSignedMul(Op0Conv->getOperand(0),
Op1Conv->getOperand(0), I)) {
// Insert the new integer mul.
Value *NewMul = Builder->CreateNSWMul(
@@ -447,7 +448,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index d8f8a58a5fdf..c4f450949e6d 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -606,7 +606,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (unsigned Count = replaceDominatedUsesWith(
CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) {
Changed = true;
- NumCSECVP = NumCSECVP + Count;
+ NumCSECVP += Count;
}
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04646eed49a..0490d93f6455 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2057,7 +2057,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
DEBUG(verifyRemoved(PREInstr));
- delete PREInstr;
+ PREInstr->deleteValue();
return false;
}
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ae353ea44595..ada22ae38eb8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -833,15 +833,13 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
- else if (CondCmp->getParent() == BB) {
- // If the fact we just learned is true for all uses of the
- // condition, replace it with a constant value
- auto *CI = Ret == LazyValueInfo::True ?
- ConstantInt::getTrue(CondCmp->getType()) :
- ConstantInt::getFalse(CondCmp->getType());
- CondCmp->replaceAllUsesWith(CI);
- CondCmp->eraseFromParent();
- }
+ // TODO: We can safely replace *some* uses of the CondInst if it has
+ // exactly one value as returned by LVI. RAUW is incorrect in the
+ // presence of guards and assumes, that have the `Cond` as the use. This
+ // is because we use the guards/assume to reason about the `Cond` value
+ // at the end of block, but RAUW unconditionally replaces all uses
+ // including the guards/assumes themselves and the uses before the
+ // guard/assume.
return true;
}
@@ -1327,14 +1325,13 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
CondInst->eraseFromParent();
- else if (OnlyVal && OnlyVal != MultipleVal &&
- CondInst->getParent() == BB) {
- // If we just learned Cond is the same value for all uses of the
- // condition, replace it with a constant value
- CondInst->replaceAllUsesWith(OnlyVal);
- if (!CondInst->mayHaveSideEffects())
- CondInst->eraseFromParent();
- }
+ // TODO: We can safely replace *some* uses of the CondInst if it has
+ // exactly one value as returned by LVI. RAUW is incorrect in the
+ // presence of guards and assumes, that have the `Cond` as the use. This
+ // is because we use the guards/assume to reason about the `Cond` value
+ // at the end of block, but RAUW unconditionally replaces all uses
+ // including the guards/assumes themselves and the uses before the
+ // guard/assume.
}
return true;
}
@@ -1909,7 +1906,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
- delete New;
+ New->deleteValue();
New = nullptr;
}
} else {
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 02215d3450c2..494cbc61bc9c 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -228,7 +228,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
L.Load->replaceAllUsesWith(V);
}
- NumLoadsCombined = NumLoadsCombined + Loads.size();
+ NumLoadsCombined += Loads.size();
return true;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index cb6223b070a6..97337ea5ba62 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -110,6 +110,15 @@ private:
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
+ /// Return code for isLegalStore()
+ enum LegalStoreKind {
+ None = 0,
+ Memset,
+ MemsetPattern,
+ Memcpy,
+ DontUse // Dummy retval never to be used. Allows catching errors in retval
+ // handling.
+ };
/// \name Countable Loop Idiom Handling
/// @{
@@ -119,8 +128,7 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
- bool &ForMemcpy);
+ LegalStoreKind isLegalStore(StoreInst *SI);
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
bool ForMemset);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
@@ -343,20 +351,20 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
-bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
- bool &ForMemsetPattern, bool &ForMemcpy) {
+LoopIdiomRecognize::LegalStoreKind
+LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// Don't touch volatile stores.
if (!SI->isSimple())
- return false;
+ return LegalStoreKind::None;
// Don't convert stores of non-integral pointer types to memsets (which stores
// integers).
if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
- return false;
+ return LegalStoreKind::None;
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
- return false;
+ return LegalStoreKind::None;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -364,7 +372,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
// Reject stores that are so large that they overflow an unsigned.
uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
- return false;
+ return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
@@ -372,11 +380,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
- return false;
+ return LegalStoreKind::None;
// Check to see if we have a constant stride.
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
- return false;
+ return LegalStoreKind::None;
// See if the store can be turned into a memset.
@@ -394,15 +402,13 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
- ForMemset = true;
- return true;
+ return LegalStoreKind::Memset;
} else if (HasMemsetPattern &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// It looks like we can use PatternValue!
- ForMemsetPattern = true;
- return true;
+ return LegalStoreKind::MemsetPattern;
}
// Otherwise, see if the store can be turned into a memcpy.
@@ -412,12 +418,12 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
APInt Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
if (StoreSize != Stride && StoreSize != -Stride)
- return false;
+ return LegalStoreKind::None;
// The store must be feeding a non-volatile load.
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
if (!LI || !LI->isSimple())
- return false;
+ return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
@@ -425,18 +431,17 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
- return false;
+ return LegalStoreKind::None;
// The store and load must share the same stride.
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
- return false;
+ return LegalStoreKind::None;
// Success. This store can be converted into a memcpy.
- ForMemcpy = true;
- return true;
+ return LegalStoreKind::Memcpy;
}
// This store can't be transformed into a memset/memcpy.
- return false;
+ return LegalStoreKind::None;
}
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
@@ -448,24 +453,28 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
if (!SI)
continue;
- bool ForMemset = false;
- bool ForMemsetPattern = false;
- bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
- continue;
-
- // Save the store locations.
- if (ForMemset) {
+ switch (isLegalStore(SI)) {
+ case LegalStoreKind::None:
+ // Nothing to do
+ break;
+ case LegalStoreKind::Memset: {
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
StoreRefsForMemset[Ptr].push_back(SI);
- } else if (ForMemsetPattern) {
+ } break;
+ case LegalStoreKind::MemsetPattern: {
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
StoreRefsForMemsetPattern[Ptr].push_back(SI);
- } else if (ForMemcpy)
+ } break;
+ case LegalStoreKind::Memcpy:
StoreRefsForMemcpy.push_back(SI);
+ break;
+ default:
+ assert(false && "unhandled return value");
+ break;
+ }
}
}
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 0ce604429326..32fd3da465fe 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -58,12 +58,30 @@ using namespace llvm;
namespace {
class LoopPredication {
+ /// Represents an induction variable check:
+ /// icmp Pred, <induction variable>, <loop invariant limit>
+ struct LoopICmp {
+ ICmpInst::Predicate Pred;
+ const SCEVAddRecExpr *IV;
+ const SCEV *Limit;
+ LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+ const SCEV *Limit)
+ : Pred(Pred), IV(IV), Limit(Limit) {}
+ LoopICmp() {}
+ };
+
ScalarEvolution *SE;
Loop *L;
const DataLayout *DL;
BasicBlock *Preheader;
+ Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
+
+ Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ Instruction *InsertAt);
+
Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
IRBuilder<> &Builder);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
@@ -116,16 +134,10 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-/// If ICI can be widened to a loop invariant condition emits the loop
-/// invariant condition in the loop preheader and return it, otherwise
-/// returns None.
-Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
- SCEVExpander &Expander,
- IRBuilder<> &Builder) {
- DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
- DEBUG(ICI->dump());
-
+Optional<LoopPredication::LoopICmp>
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
ICmpInst::Predicate Pred = ICI->getPredicate();
+
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
const SCEV *LHSS = SE->getSCEV(LHS);
@@ -135,17 +147,54 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
if (isa<SCEVCouldNotCompute>(RHSS))
return None;
- // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+ // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
if (SE->isLoopInvariant(LHSS, L)) {
std::swap(LHS, RHS);
std::swap(LHSS, RHSS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
- if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE))
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
+ if (!AR || AR->getLoop() != L)
return None;
- const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
- if (!IndexAR || IndexAR->getLoop() != L)
+ return LoopICmp(Pred, AR, RHSS);
+}
+
+Value *LoopPredication::expandCheck(SCEVExpander &Expander,
+ IRBuilder<> &Builder,
+ ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, Instruction *InsertAt) {
+ Type *Ty = LHS->getType();
+ assert(Ty == RHS->getType() && "expandCheck operands have different types?");
+ Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
+ Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+ return Builder.CreateICmp(Pred, LHSV, RHSV);
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+ SCEVExpander &Expander,
+ IRBuilder<> &Builder) {
+ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+ DEBUG(ICI->dump());
+
+ auto RangeCheck = parseLoopICmp(ICI);
+ if (!RangeCheck) {
+ DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
+ return None;
+ }
+
+ ICmpInst::Predicate Pred = RangeCheck->Pred;
+ const SCEVAddRecExpr *IndexAR = RangeCheck->IV;
+ const SCEV *RHSS = RangeCheck->Limit;
+
+ auto CanExpand = [this](const SCEV *S) {
+ return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+ };
+ if (!CanExpand(RHSS))
return None;
DEBUG(dbgs() << "IndexAR: ");
@@ -170,17 +219,13 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
DEBUG(dbgs() << "NewLHSS: ");
DEBUG(NewLHSS->dump());
- if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+ if (!CanExpand(NewLHSS))
return None;
DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
- Type *Ty = LHS->getType();
Instruction *InsertAt = Preheader->getTerminator();
- assert(Ty == RHS->getType() && "icmp operands have different types?");
- Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt);
- Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt);
- return Builder.CreateICmp(Pred, NewLHS, NewRHS);
+ return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt);
}
bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
@@ -272,6 +317,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
if (II->getIntrinsicID() == Intrinsic::experimental_guard)
Guards.push_back(II);
+ if (Guards.empty())
+ return false;
+
SCEVExpander Expander(*SE, *DL, "loop-predication");
bool Changed = false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ba9265566a8..7312d97f8efe 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -347,7 +347,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// in the map.
ValueMap[Inst] = V;
if (!C->mayHaveSideEffects()) {
- delete C;
+ C->deleteValue();
C = nullptr;
}
} else {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index bd1f21c69eba..28d94497a3ef 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3805,6 +3805,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 0e7572f8d2e5..5cfbf6baeaa9 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -30,9 +30,19 @@
/// tracks what operations have a given value number (IE it also tracks the
/// reverse mapping from value number -> operations with that value number), so
/// that it only needs to reprocess the instructions that are affected when
-/// something's value number changes. The rest of the algorithm is devoted to
-/// performing symbolic evaluation, forward propagation, and simplification of
-/// operations based on the value numbers deduced so far.
+/// something's value number changes. The vast majority of complexity and code
+/// in this file is devoted to tracking what value numbers could change for what
+/// instructions when various things happen. The rest of the algorithm is
+/// devoted to performing symbolic evaluation, forward propagation, and
+/// simplification of operations based on the value numbers deduced so far
+///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
///
/// We also do not perform elimination by using any published algorithm. All
/// published algorithms are O(Instructions). Instead, we use a technique that
@@ -51,7 +61,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -104,12 +113,14 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes");
STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
-STATISTIC(NumGVNNotMostDominatingLeader,
- "Number of times a member dominated it's new classes' leader");
STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created");
+STATISTIC(NumGVNPHIOfOpsEliminations,
+ "Number of things eliminated using PHI of ops");
DEBUG_COUNTER(VNCounter, "newgvn-vn",
"Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+ "Controls which instructions we create phi of ops for")
// Currently store defining access refinement is too slow due to basicaa being
// egregiously slow. This flag lets us keep it working while we work on this
// issue.
@@ -172,10 +183,9 @@ private:
}
}
// See if we really were the root of a component, by seeing if we still have
- // our DFSNumber.
- // If we do, we are the root of the component, and we have completed a
- // component. If we do not,
- // we are not the root of a component, and belong on the component stack.
+ // our DFSNumber. If we do, we are the root of the component, and we have
+ // completed a component. If we do not, we are not the root of a component,
+ // and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
Components.resize(Components.size() + 1);
@@ -367,6 +377,7 @@ private:
int StoreCount = 0;
};
+struct HashedExpression;
namespace llvm {
template <> struct DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
@@ -379,9 +390,11 @@ template <> struct DenseMapInfo<const Expression *> {
Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
return reinterpret_cast<const Expression *>(Val);
}
- static unsigned getHashValue(const Expression *V) {
- return static_cast<unsigned>(V->getHashValue());
+ static unsigned getHashValue(const Expression *E) {
+ return static_cast<unsigned>(E->getHashValue());
}
+ static unsigned getHashValue(const HashedExpression &HE);
+ static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
static bool isEqual(const Expression *LHS, const Expression *RHS) {
if (LHS == RHS)
return true;
@@ -393,6 +406,26 @@ template <> struct DenseMapInfo<const Expression *> {
};
} // end namespace llvm
+// This is just a wrapper around Expression that computes the hash value once at
+// creation time. Hash values for an Expression can't change once they are
+// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
+// that point anyway.
+struct HashedExpression {
+ const Expression *E;
+ unsigned HashVal;
+ HashedExpression(const Expression *E)
+ : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
+};
+
+unsigned
+DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
+ return HE.HashVal;
+}
+bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
+ const Expression *RHS) {
+ return isEqual(LHS.E, RHS);
+}
+
namespace {
class NewGVN {
Function &F;
@@ -430,6 +463,33 @@ class NewGVN {
// Value Mappings.
DenseMap<Value *, CongruenceClass *> ValueToClass;
DenseMap<Value *, const Expression *> ValueToExpression;
+ // Value PHI handling, used to make equivalence between phi(op, op) and
+ // op(phi, phi).
+ // These mappings just store various data that would normally be part of the
+ // IR.
+ DenseSet<const Instruction *> PHINodeUses;
+ // Map a temporary instruction we created to a parent block.
+ DenseMap<const Value *, BasicBlock *> TempToBlock;
+ // Map between the temporary phis we created and the real instructions they
+ // are known equivalent to.
+ DenseMap<const Value *, PHINode *> RealToTemp;
+ // In order to know when we should re-process instructions that have
+ // phi-of-ops, we track the set of expressions that they needed as
+ // leaders. When we discover new leaders for those expressions, we process the
+ // associated phi-of-op instructions again in case they have changed. The
+ // other way they may change is if they had leaders, and those leaders
+ // disappear. However, at the point they have leaders, there are uses of the
+ // relevant operands in the created phi node, and so they will get reprocessed
+ // through the normal user marking we perform.
+ mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+ DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+ ExpressionToPhiOfOps;
+ // Map from basic block to the temporary operations we created
+ DenseMap<const BasicBlock *, SmallVector<PHINode *, 8>> PHIOfOpsPHIs;
+ // Map from temporary operation to MemoryAccess.
+ DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+ // Set of all temporary instructions we created.
+ DenseSet<Instruction *> AllTempInstructions;
// Mapping from predicate info we used to the instructions we used it with.
// In order to correctly ensure propagation, we must keep track of what
@@ -462,12 +522,19 @@ class NewGVN {
enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
- enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
- mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+ enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+ mutable DenseMap<const Instruction *, InstCycleState> InstCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
+ // We have a single expression that represents currently DeadExpressions.
+ // For dead expressions we can prove will stay dead, we mark them with
+ // DFS number zero. However, it's possible in the case of phi nodes
+ // for us to assume/prove all arguments are dead during fixpointing.
+ // We use DeadExpression for that case.
+ DeadExpression *SingletonDeadExpression = nullptr;
+
// Which values have changed as a result of leader changes.
SmallPtrSet<Value *, 8> LeaderChanges;
@@ -521,7 +588,8 @@ private:
const Expression *createBinaryExpression(unsigned, Type *, Value *,
Value *) const;
PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
- bool &AllConstant) const;
+ bool &OriginalOpsConstant) const;
+ const DeadExpression *createDeadExpression() const;
const VariableExpression *createVariableExpression(Value *) const;
const ConstantExpression *createConstantExpression(Constant *) const;
const Expression *createVariableOrConstant(Value *V) const;
@@ -562,6 +630,9 @@ private:
return CClass;
}
void initializeCongruenceClasses(Function &F);
+ const Expression *makePossiblePhiOfOps(Instruction *, bool,
+ SmallPtrSetImpl<Value *> &);
+ void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
// Value number an Instruction or MemoryPhi.
void valueNumberMemoryPhi(MemoryPhi *);
@@ -570,7 +641,8 @@ private:
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
Value *) const;
- const Expression *performSymbolicEvaluation(Value *) const;
+ const Expression *performSymbolicEvaluation(Value *,
+ SmallPtrSetImpl<Value *> &) const;
const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
Instruction *,
MemoryAccess *) const;
@@ -595,7 +667,7 @@ private:
bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
- bool isMemoryAccessTop(const MemoryAccess *) const;
+ bool isMemoryAccessTOP(const MemoryAccess *) const;
// Ranking
unsigned int getRank(const Value *) const;
@@ -619,19 +691,26 @@ private:
void replaceInstruction(Instruction *, Value *);
void markInstructionForDeletion(Instruction *);
void deleteInstructionsInBlock(BasicBlock *);
+ Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
// New instruction creation.
void handleNewInstruction(Instruction *){};
// Various instruction touch utilities
+ template <typename Map, typename KeyType, typename Func>
+ void for_each_found(Map &, const KeyType &, Func);
+ template <typename Map, typename KeyType>
+ void touchAndErase(Map &, const KeyType &);
void markUsersTouched(Value *);
void markMemoryUsersTouched(const MemoryAccess *);
void markMemoryDefTouched(const MemoryAccess *);
void markPredicateUsersTouched(Instruction *);
void markValueLeaderChangeTouched(CongruenceClass *CC);
void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+ void markPhiOfOpsChanged(const HashedExpression &HE);
void addPredicateUsers(const PredicateBase *, Instruction *) const;
void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
+ void addAdditionalUsers(Value *To, Value *User) const;
// Main loop of value numbering
void iterateTouchedInstructions();
@@ -639,13 +718,18 @@ private:
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
- void updateProcessedCount(Value *V);
+ void updateProcessedCount(const Value *V);
void verifyMemoryCongruency() const;
void verifyIterationSettled(Function &F);
void verifyStoreExpressions() const;
- bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
+ bool singleReachablePHIPath(SmallPtrSet<const MemoryAccess *, 8> &,
+ const MemoryAccess *, const MemoryAccess *) const;
BasicBlock *getBlockForValue(Value *V) const;
void deleteExpression(const Expression *E) const;
+ MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+ MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+ MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+ template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
unsigned InstrToDFSNum(const Value *V) const {
assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
return InstrDFS.lookup(V);
@@ -665,8 +749,8 @@ private:
? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
: InstrDFS.lookup(MA);
}
- bool isCycleFree(const PHINode *PN) const;
- template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+ bool isCycleFree(const Instruction *) const;
+ bool isBackedge(BasicBlock *From, BasicBlock *To) const;
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
std::pair<int, int> StartingVNCounter;
@@ -694,20 +778,46 @@ bool StoreExpression::equals(const Expression &Other) const {
return true;
}
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+ if (From == To)
+ return true;
+ auto *FromDTN = DT->getNode(From);
+ auto *ToDTN = DT->getNode(To);
+ return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
#ifndef NDEBUG
static std::string getBlockName(const BasicBlock *B) {
return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
}
#endif
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+ auto *Result = MSSA->getMemoryAccess(I);
+ return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+ return MSSA->getMemoryAccess(BB);
+}
+
// Get the basic block from an instruction/memory value.
BasicBlock *NewGVN::getBlockForValue(Value *V) const {
- if (auto *I = dyn_cast<Instruction>(V))
- return I->getParent();
- else if (auto *MP = dyn_cast<MemoryPhi>(V))
- return MP->getBlock();
- llvm_unreachable("Should have been able to figure out a block for our value");
- return nullptr;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ auto *Parent = I->getParent();
+ if (Parent)
+ return Parent;
+ Parent = TempToBlock.lookup(V);
+ assert(Parent && "Every fake instruction should have a block");
+ return Parent;
+ }
+
+ auto *MP = dyn_cast<MemoryPhi>(V);
+ assert(MP && "Should have been an instruction or a MemoryPhi");
+ return MP->getBlock();
}
// Delete a definitely dead expression, so it can be reused by the expression
@@ -719,10 +829,9 @@ void NewGVN::deleteExpression(const Expression *E) const {
const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
ExpressionAllocator.Deallocate(E);
}
-
PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
- bool &AllConstant) const {
- BasicBlock *PHIBlock = I->getParent();
+ bool &OriginalOpsConstant) const {
+ BasicBlock *PHIBlock = getBlockForValue(I);
auto *PN = cast<PHINode>(I);
auto *E =
new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
@@ -731,8 +840,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
E->setType(I->getType());
E->setOpcode(I->getOpcode());
- unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
// NewGVN assumes the operands of a PHI node are in a consistent order across
// PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
// this in LLVM at some point we don't want GVN to find wrong congruences.
@@ -753,18 +860,20 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
});
-
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
auto *BB = PN->getIncomingBlock(*U);
- auto *DTN = DT->getNode(BB);
- if (RPOOrdering.lookup(DTN) >= PHIRPO)
- HasBackedge = true;
- AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
-
- // Don't try to transform self-defined phis.
+ HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+ OriginalOpsConstant =
+ OriginalOpsConstant && isa<Constant>(*U);
+ // Use nullptr to distinguish between things that were
+ // originally self-defined and those that have an operand
+ // leader that is self-defined.
if (*U == PN)
- return PN;
+ return nullptr;
+ // Things in TOPClass are equivalent to everything.
+ if (ValueToClass.lookup(*U) == TOPClass)
+ return nullptr;
return lookupOperandLeader(*U);
});
return E;
@@ -785,7 +894,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const {
// whether all members are constant.
std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
auto Operand = lookupOperandLeader(O);
- AllConstant &= isa<Constant>(Operand);
+ AllConstant = AllConstant && isa<Constant>(Operand);
return Operand;
});
@@ -848,7 +957,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
if (CC && CC->getDefiningExpr()) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
- << " expression " << *V << "\n");
+ << " expression " << *CC->getDefiningExpr() << "\n");
NumGVNOpsSimplified++;
deleteExpression(E);
return CC->getDefiningExpr();
@@ -961,6 +1070,12 @@ NewGVN::createAggregateValueExpression(Instruction *I) const {
llvm_unreachable("Unhandled type of aggregate value operation");
}
+const DeadExpression *NewGVN::createDeadExpression() const {
+ // DeadExpression has no arguments and all DeadExpression's are the same,
+ // so we only need one of them.
+ return SingletonDeadExpression;
+}
+
const VariableExpression *NewGVN::createVariableExpression(Value *V) const {
auto *E = new (ExpressionAllocator) VariableExpression(V);
E->setOpcode(V->getValueID());
@@ -1032,7 +1147,7 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst,
Value *NewGVN::lookupOperandLeader(Value *V) const {
CongruenceClass *CC = ValueToClass.lookup(V);
if (CC) {
- // Everything in TOP is represneted by undef, as it can be any value.
+ // Everything in TOP is represented by undef, as it can be any value.
// We do have to make sure we get the type right though, so we can't set the
// RepLeader to undef.
if (CC == TOPClass)
@@ -1054,7 +1169,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
// Return true if the MemoryAccess is really equivalent to everything. This is
// equivalent to the lattice value "TOP" in most lattices. This is the initial
// state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
return getMemoryClass(MA) == TOPClass;
}
@@ -1100,7 +1215,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- auto *StoreAccess = MSSA->getMemoryAccess(SI);
+ auto *StoreAccess = getMemoryAccess(SI);
// Get the expression, if any, for the RHS of the MemoryDef.
const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
if (EnableStoreRefinement)
@@ -1108,7 +1223,6 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// If we bypassed the use-def chains, make sure we add a use.
if (StoreRHS != StoreAccess->getDefiningAccess())
addMemoryUsers(StoreRHS, StoreAccess);
-
StoreRHS = lookupMemoryLeader(StoreRHS);
// If we are defined by ourselves, use the live on entry def.
if (StoreRHS == StoreAccess)
@@ -1137,9 +1251,9 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
if ((lookupOperandLeader(LI->getPointerOperand()) ==
lookupOperandLeader(SI->getPointerOperand())) &&
- (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+ (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
StoreRHS))
- return createVariableExpression(LI);
+ return createStoreExpression(SI, StoreRHS);
}
}
@@ -1241,8 +1355,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
// Load of undef is undef.
if (isa<UndefValue>(LoadAddressLeader))
return createConstantExpression(UndefValue::get(LI->getType()));
-
- MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+ MemoryAccess *OriginalAccess = getMemoryAccess(I);
+ MemoryAccess *DefiningAccess =
+ MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -1331,6 +1446,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
// operands are equal, because assumes must always be true.
if (CmpInst::isTrueWhenEqual(Predicate)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
}
@@ -1343,6 +1459,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
(!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
// Handle the special case of floating point.
@@ -1350,6 +1467,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
(!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createConstantExpression(cast<Constant>(FirstOp));
}
}
@@ -1430,34 +1548,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
return Changed;
}
-// Determine if a phi is cycle-free. That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) const {
- // In order to compute cycle-freeness, we do SCC finding on the phi, and see
- // what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
- // If it is not in a singleton, it is only cycle free if the other members are
- // all phi nodes (as they do not compute anything, they are copies). TODO:
- // There are likely a few other intrinsics or expressions that could be
- // included here, but this happens so infrequently already that it is not
- // likely to be worth it.
- auto PCS = PhiCycleState.lookup(PN);
- if (PCS == PCS_Unknown) {
- SCCFinder.Start(PN);
- auto &SCC = SCCFinder.getComponentFor(PN);
+// Determine if a instruction is cycle-free. That means the values in the
+// instruction don't depend on any expressions that can change value as a result
+// of the instruction. For example, a non-cycle free instruction would be v =
+// phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) const {
+ // In order to compute cycle-freeness, we do SCC finding on the instruction,
+ // and see what kind of SCC it ends up in. If it is a singleton, it is
+ // cycle-free. If it is not in a singleton, it is only cycle free if the
+ // other members are all phi nodes (as they do not compute anything, they are
+ // copies).
+ auto ICS = InstCycleState.lookup(I);
+ if (ICS == ICS_Unknown) {
+ SCCFinder.Start(I);
+ auto &SCC = SCCFinder.getComponentFor(I);
// It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
if (SCC.size() == 1)
- PhiCycleState.insert({PN, PCS_CycleFree});
+ InstCycleState.insert({I, ICS_CycleFree});
else {
bool AllPhis =
llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
- PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+ ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
for (auto *Member : SCC)
if (auto *MemberPhi = dyn_cast<PHINode>(Member))
- PhiCycleState.insert({MemberPhi, PCS});
+ InstCycleState.insert({MemberPhi, ICS});
}
}
- if (PCS == PCS_Cycle)
+ if (ICS == ICS_Cycle)
return false;
return true;
}
@@ -1467,10 +1584,9 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
- // were constant. This is really shorthand for "this phi cannot cycle due
- // to forward propagation", as any change in value of the phi is guaranteed
- // not to later change the value of the phi.
- // IE it can't be v = phi(undef, v+1)
+ // This is really shorthand for "this phi cannot cycle due to forward
+ // change in value of the phi is guaranteed not to later change the value of
+ // the phi. IE it can't be v = phi(undef, v+1)
bool AllConstant = true;
auto *E =
cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
@@ -1478,8 +1594,16 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// See if all arguments are the same.
// We track if any were undef because they need special handling.
bool HasUndef = false;
- auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
- if (Arg == I)
+ bool CycleFree = isCycleFree(I);
+ auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) {
+ if (Arg == nullptr)
+ return false;
+ // Original self-operands are already eliminated during expression creation.
+ // We can only eliminate value-wise self-operands if it's cycle
+ // free. Otherwise, eliminating the operand can cause our value to change,
+ // which can cause us to not eliminate the operand, which changes the value
+ // back to what it was before, cycling forever.
+ if (CycleFree && Arg == I)
return false;
if (isa<UndefValue>(Arg)) {
HasUndef = true;
@@ -1487,20 +1611,19 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
}
return true;
});
- // If we are left with no operands, it's undef
+ // If we are left with no operands, it's dead.
if (Filtered.begin() == Filtered.end()) {
- DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
- << "\n");
+ DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
- return createConstantExpression(UndefValue::get(I->getType()));
+ return createDeadExpression();
}
unsigned NumOps = 0;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
- if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) {
+ if (llvm::all_of(Filtered, [&](Value *Arg) {
++NumOps;
- return V == AllSameValue;
+ return Arg == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
// phi nodes with cycles (IE dependent on other phis that are .... dependent
@@ -1519,7 +1642,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
- !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+ !isa<UndefValue>(AllSameValue) && !CycleFree)
return E;
// Only have to check for instructions
@@ -1690,8 +1813,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
return createExpression(I);
}
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function. We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+ return isa<Constant>(V) || isa<Argument>(V);
+}
+
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V,
+ SmallPtrSetImpl<Value *> &Visited) const {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
@@ -1769,12 +1902,39 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
return E;
}
+// Look up a container in a map, and then call a function for each thing in the
+// found container.
+template <typename Map, typename KeyType, typename Func>
+void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) {
+ const auto Result = M.find_as(Key);
+ if (Result != M.end())
+ for (typename Map::mapped_type::value_type Mapped : Result->second)
+ F(Mapped);
+}
+
+// Look up a container of values/instructions in a map, and touch all the
+// instructions in the container. Then erase value from the map.
+template <typename Map, typename KeyType>
+void NewGVN::touchAndErase(Map &M, const KeyType &Key) {
+ const auto Result = M.find_as(Key);
+ if (Result != M.end()) {
+ for (const typename Map::mapped_type::value_type Mapped : Result->second)
+ TouchedInstructions.set(InstrToDFSNum(Mapped));
+ M.erase(Result);
+ }
+}
+
+void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
+ AdditionalUsers[To].insert(User);
+}
+
void NewGVN::markUsersTouched(Value *V) {
// Now mark the users as touched.
for (auto *User : V->users()) {
assert(isa<Instruction>(User) && "Use of value not within an instruction?");
TouchedInstructions.set(InstrToDFSNum(User));
}
+ touchAndErase(AdditionalUsers, V);
}
void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
@@ -1791,16 +1951,15 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
return;
for (auto U : MA->users())
TouchedInstructions.set(MemoryToDFSNum(U));
- const auto Result = MemoryToUsers.find(MA);
- if (Result != MemoryToUsers.end()) {
- for (auto *User : Result->second)
- TouchedInstructions.set(MemoryToDFSNum(User));
- MemoryToUsers.erase(Result);
- }
+ touchAndErase(MemoryToUsers, MA);
}
// Add I to the set of users of a given predicate.
void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
+ // Don't add temporary instructions to the user lists.
+ if (AllTempInstructions.count(I))
+ return;
+
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1809,12 +1968,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
// Touch all the predicates that depend on this instruction.
void NewGVN::markPredicateUsersTouched(Instruction *I) {
- const auto Result = PredicateToUsers.find(I);
- if (Result != PredicateToUsers.end()) {
- for (auto *User : Result->second)
- TouchedInstructions.set(InstrToDFSNum(User));
- PredicateToUsers.erase(Result);
- }
+ touchAndErase(PredicateToUsers, I);
}
// Mark users affected by a memory leader change.
@@ -1856,11 +2010,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
if (CC->getStoreCount() > 0) {
if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
- return MSSA->getMemoryAccess(NL);
+ return getMemoryAccess(NL);
// Find the store with the minimum DFS number.
auto *V = getMinDFSOfRange<Value>(make_filter_range(
*CC, [&](const Value *V) { return isa<StoreInst>(V); }));
- return MSSA->getMemoryAccess(cast<StoreInst>(V));
+ return getMemoryAccess(cast<StoreInst>(V));
}
assert(CC->getStoreCount() == 0);
@@ -1945,31 +2099,11 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
if (I == OldClass->getNextLeader().first)
OldClass->resetNextLeader();
- // It's possible, though unlikely, for us to discover equivalences such
- // that the current leader does not dominate the old one.
- // This statistic tracks how often this happens.
- // We assert on phi nodes when this happens, currently, for debugging, because
- // we want to make sure we name phi node cycles properly.
- if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() &&
- I != NewClass->getLeader()) {
- auto *IBB = I->getParent();
- auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent();
- bool Dominated =
- IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader());
- Dominated = Dominated || DT->properlyDominates(IBB, NCBB);
- if (Dominated) {
- ++NumGVNNotMostDominatingLeader;
- assert(
- !isa<PHINode>(I) &&
- "New class for instruction should not be dominated by instruction");
- }
- }
+ OldClass->erase(I);
+ NewClass->insert(I);
if (NewClass->getLeader() != I)
NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)});
-
- OldClass->erase(I);
- NewClass->insert(I);
// Handle our special casing of stores.
if (auto *SI = dyn_cast<StoreInst>(I)) {
OldClass->decStoreCount();
@@ -1984,7 +2118,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// If it's a store expression we are using, it means we are not equivalent
// to something earlier.
if (auto *SE = dyn_cast<StoreExpression>(E)) {
- assert(SE->getStoredValue() != NewClass->getLeader());
NewClass->setStoredValue(SE->getStoredValue());
markValueLeaderChangeTouched(NewClass);
// Shift the new class leader to be the store
@@ -2003,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// instructions before.
// If it's not a memory use, set the MemoryAccess equivalence
- auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+ auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
if (InstMA)
moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
@@ -2035,21 +2168,31 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
}
}
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
+ touchAndErase(ExpressionToPhiOfOps, HE);
+}
+
// Perform congruence finding on a given value numbering expression.
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// This is guaranteed to return something, since it will at least find
// TOP.
- CongruenceClass *IClass = ValueToClass[I];
+ CongruenceClass *IClass = ValueToClass.lookup(I);
assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
assert(!IClass->isDead() && "Found a dead class");
- CongruenceClass *EClass;
+ CongruenceClass *EClass = nullptr;
+ HashedExpression HE(E);
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
- EClass = ValueToClass[VE->getVariableValue()];
- } else {
- auto lookupResult = ExpressionToClass.insert({E, nullptr});
+ EClass = ValueToClass.lookup(VE->getVariableValue());
+ } else if (isa<DeadExpression>(E)) {
+ EClass = TOPClass;
+ }
+ if (!EClass) {
+ auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
// If it's not in the value table, create a new congruence class.
if (lookupResult.second) {
@@ -2098,10 +2241,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
if (ClassChanged || LeaderChanged) {
DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
<< "\n");
- if (ClassChanged)
+ if (ClassChanged) {
moveValueToNewCongruenceClass(I, E, IClass, EClass);
+ markPhiOfOpsChanged(HE);
+ }
+
markUsersTouched(I);
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+ if (MemoryAccess *MA = getMemoryAccess(I))
markMemoryUsersTouched(MA);
if (auto *CI = dyn_cast<CmpInst>(I))
markPredicateUsersTouched(CI);
@@ -2110,11 +2256,11 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// old store expression. In particular, loads do not compare against stored
// value, so they will find old store expressions (and associated class
// mappings) if we leave them in the table.
- if (ClassChanged && isa<StoreExpression>(E)) {
+ if (ClassChanged && isa<StoreInst>(I)) {
auto *OldE = ValueToExpression.lookup(I);
// It could just be that the old class died. We don't want to erase it if we
// just moved classes.
- if (OldE && isa<StoreExpression>(OldE) && !OldE->equals(*E))
+ if (OldE && isa<StoreExpression>(OldE) && *E != *OldE)
ExpressionToClass.erase(OldE);
}
ValueToExpression[I] = E;
@@ -2139,7 +2285,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// impact predicates. Otherwise, only mark the phi nodes as touched, as
// they are the only thing that depend on new edges. Anything using their
// values will get propagated to if necessary.
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+ if (MemoryAccess *MemPhi = getMemoryAccess(To))
TouchedInstructions.set(InstrToDFSNum(MemPhi));
auto BI = To->begin();
@@ -2147,6 +2293,9 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
TouchedInstructions.set(InstrToDFSNum(&*BI));
++BI;
}
+ for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) {
+ TouchedInstructions.set(InstrToDFSNum(I));
+ });
}
}
}
@@ -2236,7 +2385,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
// This also may be a memory defining terminator, in which case, set it
// equivalent only to itself.
//
- auto *MA = MSSA->getMemoryAccess(TI);
+ auto *MA = getMemoryAccess(TI);
if (MA && !isa<MemoryUse>(MA)) {
auto *CC = ensureLeaderOfMemoryClass(MA);
if (setMemoryClass(MA, CC))
@@ -2245,6 +2394,158 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
}
}
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+ Instruction *ExistingValue) {
+ InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+ AllTempInstructions.insert(Op);
+ PHIOfOpsPHIs[BB].push_back(Op);
+ TempToBlock[Op] = BB;
+ if (ExistingValue)
+ RealToTemp[ExistingValue] = Op;
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) ||
+ isa<LoadInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+ SmallPtrSetImpl<Value *> &Visited) {
+ if (!okayForPHIOfOps(I))
+ return nullptr;
+
+ if (!Visited.insert(I).second)
+ return nullptr;
+ // For now, we require the instruction be cycle free because we don't
+ // *always* create a phi of ops for instructions that could be done as phi
+ // of ops, we only do it if we think it is useful. If we did do it all the
+ // time, we could remove the cycle free check.
+ if (!isCycleFree(I))
+ return nullptr;
+
+ unsigned IDFSNum = InstrToDFSNum(I);
+ // Pretty much all of the instructions we can convert to phi of ops over a
+ // backedge that are adds, are really induction variables, and those are
+ // pretty much pointless to convert. This is very coarse-grained for a
+ // test, so if we do find some value, we can change it later.
+ // But otherwise, what can happen is we convert the induction variable from
+ //
+ // i = phi (0, tmp)
+ // tmp = i + 1
+ //
+ // to
+ // i = phi (0, tmpphi)
+ // tmpphi = phi(1, tmpphi+1)
+ //
+ // Which we don't want to happen. We could just avoid this for all non-cycle
+ // free phis, and we made go that route.
+ if (HasBackedge && I->getOpcode() == Instruction::Add)
+ return nullptr;
+
+ SmallPtrSet<const Value *, 8> ProcessedPHIs;
+ // TODO: We don't do phi translation on memory accesses because it's
+ // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+ // which we don't have a good way of doing ATM.
+ auto *MemAccess = getMemoryAccess(I);
+ // If the memory operation is defined by a memory operation this block that
+ // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+ // can't help, as it would still be killed by that memory operation.
+ if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+ MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+ return nullptr;
+
+ // Convert op of phis to phi of ops
+ for (auto &Op : I->operands()) {
+ if (!isa<PHINode>(Op))
+ continue;
+ auto *OpPHI = cast<PHINode>(Op);
+ // No point in doing this for one-operand phis.
+ if (OpPHI->getNumOperands() == 1)
+ continue;
+ if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+ return nullptr;
+ SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+ auto *PHIBlock = getBlockForValue(OpPHI);
+ for (auto PredBB : OpPHI->blocks()) {
+ Value *FoundVal = nullptr;
+ // We could just skip unreachable edges entirely but it's tricky to do
+ // with rewriting existing phi nodes.
+ if (ReachableEdges.count({PredBB, PHIBlock})) {
+ // Clone the instruction, create an expression from it, and see if we
+ // have a leader.
+ Instruction *ValueOp = I->clone();
+ auto Iter = TempToMemory.end();
+ if (MemAccess)
+ Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+ for (auto &Op : ValueOp->operands()) {
+ Op = Op->DoPHITranslation(PHIBlock, PredBB);
+ // When this operand changes, it could change whether there is a
+ // leader for us or not.
+ addAdditionalUsers(Op, I);
+ }
+ // Make sure it's marked as a temporary instruction.
+ AllTempInstructions.insert(ValueOp);
+ // and make sure anything that tries to add it's DFS number is
+ // redirected to the instruction we are making a phi of ops
+ // for.
+ InstrDFS.insert({ValueOp, IDFSNum});
+ const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+ InstrDFS.erase(ValueOp);
+ AllTempInstructions.erase(ValueOp);
+ ValueOp->deleteValue();
+ if (MemAccess)
+ TempToMemory.erase(Iter);
+ if (!E)
+ return nullptr;
+ FoundVal = findPhiOfOpsLeader(E, PredBB);
+ if (!FoundVal) {
+ ExpressionToPhiOfOps[E].insert(I);
+ return nullptr;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(FoundVal))
+ FoundVal = SI->getValueOperand();
+ } else {
+ DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+ << getBlockName(PredBB)
+ << " because the block is unreachable\n");
+ FoundVal = UndefValue::get(I->getType());
+ }
+
+ Ops.push_back({FoundVal, PredBB});
+ DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+ << getBlockName(PredBB) << "\n");
+ }
+ auto *ValuePHI = RealToTemp.lookup(I);
+ bool NewPHI = false;
+ if (!ValuePHI) {
+ ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+ addPhiOfOps(ValuePHI, PHIBlock, I);
+ NewPHI = true;
+ NumGVNPHIOfOpsCreated++;
+ }
+ if (NewPHI) {
+ for (auto PHIOp : Ops)
+ ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+ } else {
+ unsigned int i = 0;
+ for (auto PHIOp : Ops) {
+ ValuePHI->setIncomingValue(i, PHIOp.first);
+ ValuePHI->setIncomingBlock(i, PHIOp.second);
+ ++i;
+ }
+ }
+
+ DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+ << "\n");
+ return performSymbolicEvaluation(ValuePHI, Visited);
+ }
+ return nullptr;
+}
+
// The algorithm initially places the values of the routine in the TOP
// congruence class. The leader of TOP is the undetermined value `undef`.
// When the algorithm has finished, values still in TOP are unreachable.
@@ -2287,6 +2588,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
TOPClass->incStoreCount();
}
for (auto &I : *BB) {
+ // TODO: Move to helper
+ if (isa<PHINode>(&I))
+ for (auto *U : I.users())
+ if (auto *UInst = dyn_cast<Instruction>(U))
+ if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+ PHINodeUses.insert(UInst);
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2311,12 +2618,35 @@ void NewGVN::cleanupTables() {
CongruenceClasses[i] = nullptr;
}
+ // Destroy the value expressions
+ SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+ AllTempInstructions.end());
+ AllTempInstructions.clear();
+
+ // We have to drop all references for everything first, so there are no uses
+ // left as we delete them.
+ for (auto *I : TempInst) {
+ I->dropAllReferences();
+ }
+
+ while (!TempInst.empty()) {
+ auto *I = TempInst.back();
+ TempInst.pop_back();
+ I->deleteValue();
+ }
+
ValueToClass.clear();
ArgRecycler.clear(ExpressionAllocator);
ExpressionAllocator.Reset();
CongruenceClasses.clear();
ExpressionToClass.clear();
ValueToExpression.clear();
+ RealToTemp.clear();
+ AdditionalUsers.clear();
+ ExpressionToPhiOfOps.clear();
+ TempToBlock.clear();
+ TempToMemory.clear();
+ PHIOfOpsPHIs.clear();
ReachableBlocks.clear();
ReachableEdges.clear();
#ifndef NDEBUG
@@ -2332,14 +2662,17 @@ void NewGVN::cleanupTables() {
MemoryToUsers.clear();
}
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
unsigned Start) {
unsigned End = Start;
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+ if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
InstrDFS[MemPhi] = End++;
DFSToInstr.emplace_back(MemPhi);
}
+ // Then the real block goes next.
for (auto &I : *B) {
// There's no need to call isInstructionTriviallyDead more than once on
// an instruction. Therefore, once we know that an instruction is dead
@@ -2350,7 +2683,6 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
markInstructionForDeletion(&I);
continue;
}
-
InstrDFS[&I] = End++;
DFSToInstr.emplace_back(&I);
}
@@ -2361,7 +2693,7 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
return std::make_pair(Start, End);
}
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
#ifndef NDEBUG
if (ProcessedCount.count(V) == 0) {
ProcessedCount.insert({V, 1});
@@ -2375,12 +2707,13 @@ void NewGVN::updateProcessedCount(Value *V) {
// Evaluate MemoryPhi nodes symbolically, just like PHI nodes
void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// If all the arguments are the same, the MemoryPhi has the same value as the
- // argument.
- // Filter out unreachable blocks and self phis from our operands.
+ // argument. Filter out unreachable blocks and self phis from our operands.
+ // TODO: We could do cycle-checking on the memory phis to allow valueizing for
+ // self-phi checking.
const BasicBlock *PHIBlock = MP->getBlock();
auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
- return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
- !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+ return cast<MemoryAccess>(U) != MP &&
+ !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
// If all that is left is nothing, our memoryphi is undef. We keep it as
@@ -2433,18 +2766,26 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
DEBUG(dbgs() << "Processing instruction " << *I << "\n");
if (!I->isTerminator()) {
const Expression *Symbolized = nullptr;
+ SmallPtrSet<Value *, 2> Visited;
if (DebugCounter::shouldExecute(VNCounter)) {
- Symbolized = performSymbolicEvaluation(I);
+ Symbolized = performSymbolicEvaluation(I, Visited);
+ // Make a phi of ops if necessary
+ if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
+ !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
+ // FIXME: Backedge argument
+ auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+ if (PHIE)
+ Symbolized = PHIE;
+ }
+
} else {
// Mark the instruction as unused so we don't value number it again.
InstrDFS[I] = 0;
}
// If we couldn't come up with a symbolic expression, use the unknown
// expression
- if (Symbolized == nullptr) {
+ if (Symbolized == nullptr)
Symbolized = createUnknownExpression(I);
- }
-
performCongruenceFinding(I, Symbolized);
} else {
// Handle terminators that return values. All of them produce values we
@@ -2460,13 +2801,23 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
// Check if there is a path, using single or equal argument phi nodes, from
// First to Second.
-bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
- const MemoryAccess *Second) const {
+bool NewGVN::singleReachablePHIPath(
+ SmallPtrSet<const MemoryAccess *, 8> &Visited, const MemoryAccess *First,
+ const MemoryAccess *Second) const {
if (First == Second)
return true;
if (MSSA->isLiveOnEntryDef(First))
return false;
+ // This is not perfect, but as we're just verifying here, we can live with
+ // the loss of precision. The real solution would be that of doing strongly
+ // connected component finding in this routine, and it's probably not worth
+ // the complexity for the time being. So, we just keep a set of visited
+ // MemoryAccess and return true when we hit a cycle.
+ if (Visited.count(First))
+ return true;
+ Visited.insert(First);
+
const auto *EndDef = First;
for (auto *ChainDef : optimized_def_chain(First)) {
if (ChainDef == Second)
@@ -2489,7 +2840,8 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
Okay =
std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
if (Okay)
- return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]),
+ Second);
return false;
}
@@ -2552,17 +2904,17 @@ void NewGVN::verifyMemoryCongruency() const {
auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
for (auto KV : Filtered) {
- assert(KV.second != TOPClass &&
- "Memory not unreachable but ended up in TOP");
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
- if (FirstMUD && SecondMUD)
- assert((singleReachablePHIPath(FirstMUD, SecondMUD) ||
+ if (FirstMUD && SecondMUD) {
+ SmallPtrSet<const MemoryAccess *, 8> VisitedMAS;
+ assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) ||
ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
"The instructions for these memory operations should have "
"been in the same congruence class or reachable through"
"a single argument phi");
+ }
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
// We can only sanely verify that MemoryDefs in the operand list all have
// the same class.
@@ -2671,7 +3023,7 @@ void NewGVN::iterateTouchedInstructions() {
// Nothing set, nothing to iterate, just return.
if (FirstInstr == -1)
return;
- BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+ const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
while (TouchedInstructions.any()) {
++Iterations;
// Walk through all the instructions in all the blocks in RPO.
@@ -2688,7 +3040,7 @@ void NewGVN::iterateTouchedInstructions() {
}
Value *V = InstrFromDFSNum(InstrNum);
- BasicBlock *CurrBlock = getBlockForValue(V);
+ const BasicBlock *CurrBlock = getBlockForValue(V);
// If we hit a new block, do reachability processing.
if (CurrBlock != LastBlock) {
@@ -2731,6 +3083,7 @@ bool NewGVN::runGVN() {
bool Changed = false;
NumFuncArgs = F.arg_size();
MSSAWalker = MSSA->getWalker();
+ SingletonDeadExpression = new (ExpressionAllocator) DeadExpression();
// Count number of instructions for sizing of hash tables, and come
// up with a global dfs numbering for instructions.
@@ -2769,6 +3122,7 @@ bool NewGVN::runGVN() {
BlockInstRange.insert({B, BlockRange});
ICount += BlockRange.second - BlockRange.first;
}
+ initializeCongruenceClasses(F);
TouchedInstructions.resize(ICount);
// Ensure we don't end up resizing the expressionToClass map, as
@@ -2779,9 +3133,10 @@ bool NewGVN::runGVN() {
// Initialize the touched instructions to include the entry block.
const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
TouchedInstructions.set(InstRange.first, InstRange.second);
+ DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+ << " marked reachable\n");
ReachableBlocks.insert(&F.getEntryBlock());
- initializeCongruenceClasses(F);
iterateTouchedInstructions();
verifyMemoryCongruency();
verifyIterationSettled(F);
@@ -2794,7 +3149,8 @@ bool NewGVN::runGVN() {
if (!ToErase->use_empty())
ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
- ToErase->eraseFromParent();
+ if (ToErase->getParent())
+ ToErase->eraseFromParent();
}
// Delete all unreachable blocks.
@@ -2813,14 +3169,6 @@ bool NewGVN::runGVN() {
return Changed;
}
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function. We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
- return isa<Constant>(V) || isa<Argument>(V);
-}
-
struct NewGVN::ValueDFS {
int DFSIn = 0;
int DFSOut = 0;
@@ -2910,9 +3258,21 @@ void NewGVN::convertClassToDFSOrdered(
}
assert(isa<Instruction>(D) &&
"The dense set member should always be an instruction");
- VDDef.LocalNum = InstrToDFSNum(D);
- DFSOrderedSet.emplace_back(VDDef);
Instruction *Def = cast<Instruction>(D);
+ VDDef.LocalNum = InstrToDFSNum(D);
+ DFSOrderedSet.push_back(VDDef);
+ // If there is a phi node equivalent, add it
+ if (auto *PN = RealToTemp.lookup(Def)) {
+ auto *PHIE =
+ dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+ if (PHIE) {
+ VDDef.Def.setInt(false);
+ VDDef.Def.setPointer(PN);
+ VDDef.LocalNum = 0;
+ DFSOrderedSet.push_back(VDDef);
+ }
+ }
+
unsigned int UseCount = 0;
// Now add the uses.
for (auto &U : Def->uses()) {
@@ -2929,7 +3289,7 @@ void NewGVN::convertClassToDFSOrdered(
// they are from.
VDUse.LocalNum = InstrDFS.size() + 1;
} else {
- IBlock = I->getParent();
+ IBlock = getBlockForValue(I);
VDUse.LocalNum = InstrToDFSNum(I);
}
@@ -3099,6 +3459,37 @@ private:
};
}
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+ const BasicBlock *BB) const {
+ // It would already be constant if we could make it constant
+ if (auto *CE = dyn_cast<ConstantExpression>(E))
+ return CE->getConstantValue();
+ if (auto *VE = dyn_cast<VariableExpression>(E))
+ return VE->getVariableValue();
+
+ auto *CC = ExpressionToClass.lookup(E);
+ if (!CC)
+ return nullptr;
+ if (alwaysAvailable(CC->getLeader()))
+ return CC->getLeader();
+
+ for (auto Member : *CC) {
+ auto *MemberInst = dyn_cast<Instruction>(Member);
+ // Anything that isn't an instruction is always available.
+ if (!MemberInst)
+ return Member;
+ // If we are looking for something in the same block as the member, it must
+ // be a leader because this function is looking for operands for a phi node.
+ if (MemberInst->getParent() == BB ||
+ DT->dominates(MemberInst->getParent(), BB)) {
+ return Member;
+ }
+ }
+ return nullptr;
+}
+
bool NewGVN::eliminateInstructions(Function &F) {
// This is a non-standard eliminator. The normal way to eliminate is
// to walk the dominator tree in order, keeping track of available
@@ -3129,25 +3520,42 @@ bool NewGVN::eliminateInstructions(Function &F) {
// DFS numbers are updated, we compute some ourselves.
DT->updateDFSNumbers();
- for (auto &B : F) {
- if (!ReachableBlocks.count(&B)) {
- for (const auto S : successors(&B)) {
- for (auto II = S->begin(); isa<PHINode>(II); ++II) {
- auto &Phi = cast<PHINode>(*II);
- DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
- << getBlockName(&B)
- << " with undef due to it being unreachable\n");
- for (auto &Operand : Phi.incoming_values())
- if (Phi.getIncomingBlock(Operand) == &B)
- Operand.set(UndefValue::get(Phi.getType()));
- }
+ // Go through all of our phi nodes, and kill the arguments associated with
+ // unreachable edges.
+ auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+ for (auto &Operand : PHI.incoming_values())
+ if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+ DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+ << getBlockName(PHI.getIncomingBlock(Operand))
+ << " with undef due to it being unreachable\n");
+ Operand.set(UndefValue::get(PHI.getType()));
}
+ };
+ SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+ for (auto &B : F)
+ if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+ (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+ BlocksWithPhis.insert(&B);
+ DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+ for (auto KV : ReachableEdges)
+ ReachablePredCount[KV.getEnd()]++;
+ for (auto *BB : BlocksWithPhis)
+ // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+ // the block and subtract the pred count, but it's more complicated.
+ if (ReachablePredCount.lookup(BB) !=
+ std::distance(pred_begin(BB), pred_end(BB))) {
+ for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+ auto &PHI = cast<PHINode>(*II);
+ ReplaceUnreachablePHIArgs(PHI, BB);
+ }
+ for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) {
+ ReplaceUnreachablePHIArgs(*PHI, BB);
+ });
}
- }
// Map to store the use counts
DenseMap<const Value *, unsigned int> UseCounts;
- for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+ for (auto *CC : reverse(CongruenceClasses)) {
// Track the equivalent store info so we can decide whether to try
// dead store elimination.
SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3156,13 +3564,15 @@ bool NewGVN::eliminateInstructions(Function &F) {
continue;
// Everything still in the TOP class is unreachable or dead.
if (CC == TOPClass) {
-#ifndef NDEBUG
- for (auto M : *CC)
+ for (auto M : *CC) {
+ auto *VTE = ValueToExpression.lookup(M);
+ if (VTE && isa<DeadExpression>(VTE))
+ markInstructionForDeletion(cast<Instruction>(M));
assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) ||
InstructionsToErase.count(cast<Instruction>(M))) &&
"Everything in TOP should be unreachable or dead at this "
"point");
-#endif
+ }
continue;
}
@@ -3195,7 +3605,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
<< "\n");
// If this is a singleton, we can skip it.
- if (CC->size() != 1) {
+ if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
// This is a stack because equality replacement/etc may place
// constants in the middle of the member list, and we want to use
// those constant values in preference to the current leader, over
@@ -3217,6 +3627,22 @@ bool NewGVN::eliminateInstructions(Function &F) {
// We ignore void things because we can't get a value from them.
if (Def && Def->getType()->isVoidTy())
continue;
+ auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+ if (DefInst && AllTempInstructions.count(DefInst)) {
+ auto *PN = cast<PHINode>(DefInst);
+
+ // If this is a value phi and that's the expression we used, insert
+ // it into the program
+ // remove from temp instruction list.
+ AllTempInstructions.erase(PN);
+ auto *DefBlock = getBlockForValue(Def);
+ DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+ << " into block "
+ << getBlockName(getBlockForValue(Def)) << "\n");
+ PN->insertBefore(&DefBlock->front());
+ Def = PN;
+ NumGVNPHIOfOpsEliminations++;
+ }
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -3301,6 +3727,10 @@ bool NewGVN::eliminateInstructions(Function &F) {
Value *DominatingLeader = EliminationStack.back();
+ auto *II = dyn_cast<IntrinsicInst>(DominatingLeader);
+ if (II && II->getIntrinsicID() == Intrinsic::ssa_copy)
+ DominatingLeader = II->getOperand(0);
+
// Don't replace our existing users with ourselves.
if (U->get() == DominatingLeader)
continue;
@@ -3321,6 +3751,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
// It's about to be alive again.
if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
ProbablyDead.erase(cast<Instruction>(DominatingLeader));
+ if (LeaderUseCount == 0 && II)
+ ProbablyDead.insert(II);
++LeaderUseCount;
AnythingReplaced = true;
}
@@ -3375,7 +3807,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
}
}
-
return AnythingReplaced;
}
@@ -3385,19 +3816,23 @@ bool NewGVN::eliminateInstructions(Function &F) {
// we will simplify an operation with all constants so that it doesn't matter
// what order they appear in.
unsigned int NewGVN::getRank(const Value *V) const {
- // Prefer undef to anything else
+ // Prefer constants to undef to anything else
+ // Undef is a constant, have to check it first.
+ // Prefer smaller constants to constantexprs
+ if (isa<ConstantExpr>(V))
+ return 2;
if (isa<UndefValue>(V))
- return 0;
- if (isa<Constant>(V))
return 1;
+ if (isa<Constant>(V))
+ return 0;
else if (auto *A = dyn_cast<Argument>(V))
- return 2 + A->getArgNo();
+ return 3 + A->getArgNo();
// Need to shift the instruction DFS by number of arguments + 3 to account for
// the constant and argument ranking above.
unsigned Result = InstrToDFSNum(V);
if (Result > 0)
- return 3 + NumFuncArgs + Result;
+ return 4 + NumFuncArgs + Result;
// Unreachable or something else, just return a really large number.
return ~0;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 53320bff0883..a20890b22603 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1582,7 +1582,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
}
// No need for extra uses anymore.
- delete DummyInst;
+ DummyInst->deleteValue();
unsigned NumAddedValues = NewMulOps.size();
Value *V = EmitAddTreeOfValues(I, NewMulOps);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1d9beffaf06b..24bd0a2b7bdf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2443,7 +2443,7 @@ private:
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
- delete Placeholder;
+ Placeholder->deleteValue();
} else {
LI.replaceAllUsesWith(V);
}
@@ -3898,8 +3898,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}
NumAllocaPartitionUses += NumUses;
- MaxUsesPerAllocaPartition =
- std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
+ MaxUsesPerAllocaPartition.updateMax(NumUses);
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
@@ -4016,8 +4015,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
NumAllocaPartitions += NumPartitions;
- MaxPartitionsPerAlloca =
- std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
+ MaxPartitionsPerAlloca.updateMax(NumPartitions);
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 2be3f5c533b9..8b8d6590aa6a 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -693,7 +693,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
UnlinkedInst->setOperand(I, nullptr);
RecursivelyDeleteTriviallyDeadInstructions(Op);
}
- delete UnlinkedInst;
+ UnlinkedInst->deleteValue();
}
bool Ret = !UnlinkedInstructions.empty();
UnlinkedInstructions.clear();
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4aa26fd14fee..bf2ab7c55be2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -317,7 +317,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (!NewInst->mayHaveSideEffects()) {
VMap[&*II] = V;
- delete NewInst;
+ NewInst->deleteValue();
continue;
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b44bc74d6551..27f72fcd8bda 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2235,7 +2235,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
- delete N; // Instruction folded away, don't need actual inst
+ N->deleteValue(); // Instruction folded away, don't need actual inst
N = nullptr;
}
} else {
@@ -4380,7 +4380,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
for (auto &Case : SI->cases()) {
- APInt CaseVal = Case.getCaseValue()->getValue();
+ const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
@@ -4946,7 +4946,7 @@ SwitchLookupTable::SwitchLookupTable(
LinearMappingPossible = false;
break;
}
- APInt Val = ConstVal->getValue();
+ const APInt &Val = ConstVal->getValue();
if (I != 0) {
APInt Dist = Val - PrevVal;
if (I == 1) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1de579ed41b0..85c9464b5569 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -426,57 +426,70 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
return Dst;
}
-Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
+ unsigned CharSize) {
Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src))
+ if (uint64_t Len = GetStringLength(Src, CharSize))
return ConstantInt::get(CI->getType(), Len - 1);
// If s is a constant pointer pointing to a string literal, we can fold
- // strlen(s + x) to strlen(s) - x, when x is known to be in the range
+ // strlen(s + x) to strlen(s) - x, when x is known to be in the range
// [0, strlen(s)] or the string has a single null terminator '\0' at the end.
- // We only try to simplify strlen when the pointer s points to an array
+ // We only try to simplify strlen when the pointer s points to an array
// of i8. Otherwise, we would need to scale the offset x before doing the
- // subtraction. This will make the optimization more complex, and it's not
- // very useful because calling strlen for a pointer of other types is
+ // subtraction. This will make the optimization more complex, and it's not
+ // very useful because calling strlen for a pointer of other types is
// very uncommon.
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
- if (!isGEPBasedOnPointerToString(GEP))
+ if (!isGEPBasedOnPointerToString(GEP, CharSize))
return nullptr;
- StringRef Str;
- if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) {
- size_t NullTermIdx = Str.find('\0');
-
- // If the string does not have '\0', leave it to strlen to compute
- // its length.
- if (NullTermIdx == StringRef::npos)
- return nullptr;
-
+ ConstantDataArraySlice Slice;
+ if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
+ uint64_t NullTermIdx;
+ if (Slice.Array == nullptr) {
+ NullTermIdx = 0;
+ } else {
+ NullTermIdx = ~((uint64_t)0);
+ for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
+ if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
+ NullTermIdx = I;
+ break;
+ }
+ }
+ // If the string does not have '\0', leave it to strlen to compute
+ // its length.
+ if (NullTermIdx == ~((uint64_t)0))
+ return nullptr;
+ }
+
Value *Offset = GEP->getOperand(2);
unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
KnownBits Known(BitWidth);
computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);
Known.Zero.flipAllBits();
- size_t ArrSize =
+ uint64_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
- // KnownZero's bits are flipped, so zeros in KnownZero now represent
- // bits known to be zeros in Offset, and ones in KnowZero represent
+ // KnownZero's bits are flipped, so zeros in KnownZero now represent
+ // bits known to be zeros in Offset, and ones in KnowZero represent
// bits unknown in Offset. Therefore, Offset is known to be in range
- // [0, NullTermIdx] when the flipped KnownZero is non-negative and
+ // [0, NullTermIdx] when the flipped KnownZero is non-negative and
// unsigned-less-than NullTermIdx.
//
- // If Offset is not provably in the range [0, NullTermIdx], we can still
- // optimize if we can prove that the program has undefined behavior when
- // Offset is outside that range. That is the case when GEP->getOperand(0)
+ // If Offset is not provably in the range [0, NullTermIdx], we can still
+ // optimize if we can prove that the program has undefined behavior when
+ // Offset is outside that range. That is the case when GEP->getOperand(0)
// is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
- NullTermIdx == ArrSize - 1))
- return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
+ NullTermIdx == ArrSize - 1)) {
+ Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
+ return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
Offset);
+ }
}
return nullptr;
@@ -484,8 +497,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
// strlen(x?"foo":"bars") --> x ? 3 : 4
if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
- uint64_t LenTrue = GetStringLength(SI->getTrueValue());
- uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
if (LenTrue && LenFalse) {
Function *Caller = CI->getParent()->getParent();
emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
@@ -505,6 +518,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+ return optimizeStringLength(CI, B, 8);
+}
+
+Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
+ Module &M = *CI->getParent()->getParent()->getParent();
+ unsigned WCharSize = TLI->getWCharSize(M) * 8;
+
+ return optimizeStringLength(CI, B, WCharSize);
+}
+
Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -2026,6 +2050,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeMemMove(CI, Builder);
case LibFunc_memset:
return optimizeMemSet(CI, Builder);
+ case LibFunc_wcslen:
+ return optimizeWcslen(CI, Builder);
default:
break;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 516ab7d03a88..1dc554bede7e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3047,7 +3047,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
- 0, "wide.masked.gather");
+ nullptr, "wide.masked.gather");
Entry[Part] = NewLI;
} else {
// Calculate the pointer for the specific unroll-part.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 64013d6d687d..e6f78e6b94a3 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -578,12 +578,12 @@ private:
void eraseInstruction(Instruction *I) {
I->removeFromParent();
I->dropAllReferences();
- DeletedInstructions.push_back(std::unique_ptr<Instruction>(I));
+ DeletedInstructions.emplace_back(I);
}
/// Temporary store for deleted instructions. Instructions will be deleted
/// eventually when the BoUpSLP is destructed.
- SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
+ SmallVector<unique_value, 8> DeletedInstructions;
/// A list of values that need to extracted out of the tree.
/// This list holds pairs of (Internal Scalar : External User). External User