aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-04 22:11:11 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-04 22:11:11 +0000
commitc82ad72f63369bc462e59458f09960d66daa58a9 (patch)
tree58bc455a8d052220f9ae11e65d6f06d671a7a4c4 /lib/Transforms
parentb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff)
Vendor import of llvm trunk r291012:vendor/llvm/llvm-trunk-r291012
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp82
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp19
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp4
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp60
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp2
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp25
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp10
9 files changed, 157 insertions, 75 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3bbc70ab21c6..55151c13b430 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// add(zext(xor i16 X, -32768), -32768) --> sext X
return CastInst::Create(Instruction::SExt, X, LHS->getType());
}
+
+ if (Val->isNegative() &&
+ match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
+ Val->sge(-C->sext(Val->getBitWidth()))) {
+ // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
+ return CastInst::Create(
+ Instruction::ZExt,
+ Builder->CreateNUWAdd(
+ X, Constant::getIntegerValue(X->getType(),
+ *C + Val->trunc(C->getBitWidth()))),
+ I.getType());
+ }
}
// FIXME: Use the match above instead of dyn_cast to allow these transforms
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92369bd70b13..f863d192fc2f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
break;
}
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd: {
+ Value *Src0 = II->getArgOperand(0);
+ Value *Src1 = II->getArgOperand(1);
+
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ II->setArgOperand(0, Src1);
+ II->setArgOperand(1, Src0);
+ std::swap(Src0, Src1);
+ }
+
+ Value *LHS = nullptr;
+ Value *RHS = nullptr;
+
+ // fma fneg(x), fneg(y), z -> fma x, y, z
+ if (match(Src0, m_FNeg(m_Value(LHS))) &&
+ match(Src1, m_FNeg(m_Value(RHS)))) {
+ CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+ {LHS, RHS, II->getArgOperand(2)});
+ NewCall->takeName(II);
+ NewCall->copyFastMathFlags(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+
+ // fma fabs(x), fabs(x), z -> fma x, x, z
+ if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
+ match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
+ CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+ {LHS, LHS, II->getArgOperand(2)});
+ NewCall->takeName(II);
+ NewCall->copyFastMathFlags(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+
+ // fma x, 1, z -> fadd x, z
+ if (match(Src1, m_FPOne())) {
+ Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
+ RI->copyFastMathFlags(II);
+ return RI;
+ }
+
+ break;
+ }
+ case Intrinsic::fabs: {
+ Value *Cond;
+ Constant *LHS, *RHS;
+ if (match(II->getArgOperand(0),
+ m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
+ CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
+ CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
+ return SelectInst::Create(Cond, Call0, Call1);
+ }
+
+ break;
+ }
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
@@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
// (if assume is valid at the load)
- if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
- Value *LHS = ICmp->getOperand(0);
- Value *RHS = ICmp->getOperand(1);
- if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
- isa<LoadInst>(LHS) &&
- isa<Constant>(RHS) &&
- RHS->getType()->isPointerTy() &&
- cast<Constant>(RHS)->isNullValue()) {
- LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, &DT)) {
- MDNode *MD = MDNode::get(II->getContext(), None);
- LI->setMetadata(LLVMContext::MD_nonnull, MD);
- return eraseInstFromFunction(*II);
- }
- }
+ CmpInst::Predicate Pred;
+ Instruction *LHS;
+ if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
+ Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
+ LHS->getType()->isPointerTy() &&
+ isValidAssumeForContext(II, LHS, &DT)) {
+ MDNode *MD = MDNode::get(II->getContext(), None);
+ LHS->setMetadata(LLVMContext::MD_nonnull, MD);
+ return eraseInstFromFunction(*II);
+
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
}
+
// If there is a dominating assume with the same condition as this one,
// then this one is redundant, and should be removed.
APInt KnownZero(1, 0), KnownOne(1, 0);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5276bee4e0a2..388c5e4e7fa4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
BasicBlock::iterator BBI(LI);
bool IsLoadCSE = false;
- if (Value *AvailableVal =
- FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
- DefMaxInstsToScan, AA, &IsLoadCSE)) {
- if (IsLoadCSE) {
- LoadInst *NLI = cast<LoadInst>(AvailableVal);
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group, LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null};
- combineMetadata(NLI, &LI, KnownIDs);
- };
+ if (Value *AvailableVal = FindAvailableLoadedValue(
+ &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+ if (IsLoadCSE)
+ combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
return replaceInstUsesWith(
LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index bc38c4aca348..5ad2a1c0e3e6 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned ShAmt = Op1C->getZExtValue();
+ // Turn:
+ // %zext = zext i32 %V to i64
+ // %res = shl i64 %V, 8
+ //
+ // Into:
+ // %shl = shl i32 %V, 8
+ // %res = zext i32 %shl to i64
+ //
+ // This is only valid if %V would have zeros shifted out.
+ if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
+ unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
+ if (ShAmt < SrcBitWidth &&
+ MaskedValueIsZero(ZI->getOperand(0),
+ APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
+ auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
+ return new ZExtInst(Shl, I.getType());
+ }
+ }
+
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
MaskedValueIsZero(I.getOperand(0),
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 9bf638dcbae3..16e08ee58fbe 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -481,9 +481,9 @@ private:
bool processNode(DomTreeNode *Node);
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
return LI;
- else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ if (auto *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand();
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index dee61b77412e..8b8236390bf4 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
+STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
//===----------------------------------------------------------------------===//
// GVN Pass
@@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- // If this store's memorydef stores the same value as the last store, the
- // memory accesses are equivalent.
- // Get the expression, if any, for the RHS of the MemoryDef.
MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
- MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
- cast<MemoryDef>(StoreAccess)->getDefiningAccess());
- const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
- // See if this store expression already has a value, and it's the same as our
- // current store. FIXME: Right now, we only do this for simple stores.
+ // See if we are defined by a previous store expression, it already has a
+ // value, and it's the same value as our current store. FIXME: Right now, we
+ // only do this for simple stores, we should expand to cover memcpys, etc.
if (SI->isSimple()) {
+ // Get the expression, if any, for the RHS of the MemoryDef.
+ MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
+ cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+ const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
@@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
if (auto *I = dyn_cast<Instruction>(V)) {
if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
// If this is a MemoryDef, we need to update the equivalence table. If
- // we
- // determined the expression is congruent to a different memory state,
- // use that different memory state. If we determined it didn't, we
- // update
- // that as well. Note that currently, we do not guarantee the
- // "different" memory state dominates us. The goal is to make things
- // that are congruent look congruent, not ensure we can eliminate one in
- // favor of the other.
- // Right now, the only way they can be equivalent is for store
- // expresions.
- if (!isa<MemoryUse>(MA)) {
- if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
- auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
- setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
- } else {
- setMemoryAccessEquivTo(MA, nullptr);
- }
+ // we determined the expression is congruent to a different memory
+ // state, use that different memory state. If we determined it didn't,
+ // we update that as well. Right now, we only support store
+ // expressions.
+ if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
+ EClass->Members.size() != 1) {
+ auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+ setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+ } else {
+ setMemoryAccessEquivTo(MA, nullptr);
}
markMemoryUsersTouched(MA);
}
@@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
} else {
// Handle terminators that return values. All of them produce values we
// don't currently understand.
- if (!I->getType()->isVoidTy()){
+ if (!I->getType()->isVoidTy()) {
auto *Symbolized = createUnknownExpression(I);
performCongruenceFinding(I, Symbolized);
}
@@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() {
continue;
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
- if (FirstMUD && SecondMUD) {
- auto *FirstInst = FirstMUD->getMemoryInst();
- auto *SecondInst = SecondMUD->getMemoryInst();
+ if (FirstMUD && SecondMUD)
assert(
- ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
+ ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
+ ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
"The instructions for these memory operations should have been in "
"the same congruence class");
- }
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
// We can only sanely verify that MemoryDefs in the operand list all have
@@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
initializeCongruenceClasses(F);
+ unsigned int Iterations = 0;
// We start out in the entry block.
BasicBlock *LastBlock = &F.getEntryBlock();
while (TouchedInstructions.any()) {
+ ++Iterations;
// Walk through all the instructions in all the blocks in RPO.
for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
InstrNum = TouchedInstructions.find_next(InstrNum)) {
@@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
TouchedInstructions.reset(InstrNum);
}
}
-
-// FIXME: Move this to expensive checks when we are satisfied with NewGVN
+ NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
#ifndef NDEBUG
verifyMemoryCongruency();
#endif
@@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Cleanup the congruence class.
SmallPtrSet<Value *, 4> MembersLeft;
- for (Value * Member : CC->Members) {
+ for (Value *Member : CC->Members) {
if (Member->getType()->isVoidTy()) {
MembersLeft.insert(Member);
continue;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index a2ceded106b4..a40079ca8e76 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
-/// have different "unqiue scopes" at every call site. Were this not done, then
+/// have different "unique scopes" at every call site. Were this not done, then
/// aliasing scopes from a function inlined into a caller multiple times could
/// not be differentiated (and this would lead to miscompiles because the
/// non-aliasing property communicated by the metadata could have
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index dc526a20c903..842cf31f2e3d 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
uint64_t TrueWeight, FalseWeight;
- uint64_t ExitWeight = 0, BackEdgeWeight = 0;
+ uint64_t ExitWeight = 0, CurHeaderWeight = 0;
if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // weight and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
}
// For each peeled-off iteration, make a copy of the loop.
@@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
- // The exit weight of the previous iteration is the header entry weight
- // of the current iteration. So this is exactly how many dynamic iterations
- // the current peeled-off static iteration uses up.
+ // Subtract the exit weight from the current header weight -- the exit
+ // weight is exactly the weight of the previous iteration's header.
// FIXME: due to the way the distribution is constructed, we need a
// guard here to make sure we don't end up with non-positive weights.
- if (ExitWeight < BackEdgeWeight)
- BackEdgeWeight -= ExitWeight;
+ if (ExitWeight < CurHeaderWeight)
+ CurHeaderWeight -= ExitWeight;
else
- BackEdgeWeight = 1;
+ CurHeaderWeight = 1;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
NewBlocks, LoopBlocks, VMap, LVMap, LI);
@@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// Adjust the branch weights on the loop exit.
if (ExitWeight) {
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
MDBuilder MDB(LatchBR->getContext());
MDNode *WeightNode =
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 3846b21c502e..54390e77bb1f 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
I0->getOperandUse(O).set(NewOperands[O]);
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
- // Update metadata and IR flags.
+ // The debug location for the "common" instruction is the merged locations of
+ // all the commoned instructions. We start with the original location of the
+ // "common" instruction and iteratively merge each location in the loop below.
+ DILocation *Loc = I0->getDebugLoc();
+
+ // Update metadata and IR flags, and merge debug locations.
for (auto *I : Insts)
if (I != I0) {
+ Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
combineMetadataForCSE(I0, I);
I0->andIRFlags(I);
}
+ if (!isa<CallInst>(I0))
+ I0->setDebugLoc(Loc);
if (!isa<StoreInst>(I0)) {
// canSinkLastInstruction checked that all instructions were used by