diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-04 22:11:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-04 22:11:11 +0000 |
commit | c82ad72f63369bc462e59458f09960d66daa58a9 (patch) | |
tree | 58bc455a8d052220f9ae11e65d6f06d671a7a4c4 /lib/Transforms | |
parent | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff) |
Vendor import of llvm trunk r291012:vendor/llvm/llvm-trunk-r291012
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 82 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 19 | ||||
-rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 60 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollPeel.cpp | 25 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 10 |
9 files changed, 157 insertions, 75 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3bbc70ab21c6..55151c13b430 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // add(zext(xor i16 X, -32768), -32768) --> sext X return CastInst::Create(Instruction::SExt, X, LHS->getType()); } + + if (Val->isNegative() && + match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) && + Val->sge(-C->sext(Val->getBitWidth()))) { + // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val)) + return CastInst::Create( + Instruction::ZExt, + Builder->CreateNUWAdd( + X, Constant::getIntegerValue(X->getType(), + *C + Val->trunc(C->getBitWidth()))), + I.getType()); + } } // FIXME: Use the match above instead of dyn_cast to allow these transforms diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 92369bd70b13..f863d192fc2f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, V); break; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + + // Canonicalize constants into the RHS. + if (isa<Constant>(Src0) && !isa<Constant>(Src1)) { + II->setArgOperand(0, Src1); + II->setArgOperand(1, Src0); + std::swap(Src0, Src1); + } + + Value *LHS = nullptr; + Value *RHS = nullptr; + + // fma fneg(x), fneg(y), z -> fma x, y, z + if (match(Src0, m_FNeg(m_Value(LHS))) && + match(Src1, m_FNeg(m_Value(RHS)))) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, RHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma fabs(x), fabs(x), z -> fma x, x, z + if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) && + match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, LHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma x, 1, z -> fadd x, z + if (match(Src1, m_FPOne())) { + Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + RI->copyFastMathFlags(II); + return RI; + } + + break; + } + case Intrinsic::fabs: { + Value *Cond; + Constant *LHS, *RHS; + if (match(II->getArgOperand(0), + m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { + CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + return SelectInst::Create(Cond, Call0, Call1); + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. @@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // assume( (load addr) != null ) -> add 'nonnull' metadata to load // (if assume is valid at the load) - if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) { - Value *LHS = ICmp->getOperand(0); - Value *RHS = ICmp->getOperand(1); - if (ICmpInst::ICMP_NE == ICmp->getPredicate() && - isa<LoadInst>(LHS) && - isa<Constant>(RHS) && - RHS->getType()->isPointerTy() && - cast<Constant>(RHS)->isNullValue()) { - LoadInst* LI = cast<LoadInst>(LHS); - if (isValidAssumeForContext(II, LI, &DT)) { - MDNode *MD = MDNode::get(II->getContext(), None); - LI->setMetadata(LLVMContext::MD_nonnull, MD); - return eraseInstFromFunction(*II); - } - } + CmpInst::Predicate Pred; + Instruction *LHS; + if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && + Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && + LHS->getType()->isPointerTy() && + isValidAssumeForContext(II, LHS, &DT)) { + MDNode *MD = MDNode::get(II->getContext(), None); + LHS->setMetadata(LLVMContext::MD_nonnull, MD); + return eraseInstFromFunction(*II); + // TODO: apply nonnull return attributes to calls and invokes // TODO: apply range metadata for range check patterns? } + // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. APInt KnownZero(1, 0), KnownOne(1, 0); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5276bee4e0a2..388c5e4e7fa4 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); bool IsLoadCSE = false; - if (Value *AvailableVal = - FindAvailableLoadedValue(&LI, LI.getParent(), BBI, - DefMaxInstsToScan, AA, &IsLoadCSE)) { - if (IsLoadCSE) { - LoadInst *NLI = cast<LoadInst>(AvailableVal); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null}; - combineMetadata(NLI, &LI, KnownIDs); - }; + if (Value *AvailableVal = FindAvailableLoadedValue( + &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) { + if (IsLoadCSE) + combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI); return replaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index bc38c4aca348..5ad2a1c0e3e6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); + // Turn: + // %zext = zext i32 %V to i64 + // %res = shl i64 %V, 8 + // + // Into: + // %shl = shl i32 %V, 8 + // %res = zext i32 %shl to i64 + // + // This is only valid if %V would have zeros shifted out. + if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) { + unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits(); + if (ShAmt < SrcBitWidth && + MaskedValueIsZero(ZI->getOperand(0), + APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) { + auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt); + return new ZExtInst(Shl, I.getType()); + } + } + // If the shifted-out value is known-zero, then this is a NUW shift. if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9bf638dcbae3..16e08ee58fbe 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -481,9 +481,9 @@ private: bool processNode(DomTreeNode *Node); Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (auto *LI = dyn_cast<LoadInst>(Inst)) return LI; - else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + if (auto *SI = dyn_cast<StoreInst>(Inst)) return SI->getValueOperand(); assert(isa<IntrinsicInst>(Inst) && "Instruction not supported"); return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst), diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index dee61b77412e..8b8236390bf4 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); +STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); //===----------------------------------------------------------------------===// // GVN Pass @@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast<StoreInst>(I); - // If this store's memorydef stores the same value as the last store, the - // memory accesses are equivalent. - // Get the expression, if any, for the RHS of the MemoryDef. MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast<MemoryDef>(StoreAccess)->getDefiningAccess()); - const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); - // See if this store expression already has a value, and it's the same as our - // current store. FIXME: Right now, we only do this for simple stores. + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. if (SI->isSimple()) { + // Get the expression, if any, for the RHS of the MemoryDef. + MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( + cast<MemoryDef>(StoreAccess)->getDefiningAccess()); + const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) && CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B)) @@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { if (auto *I = dyn_cast<Instruction>(V)) { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { // If this is a MemoryDef, we need to update the equivalence table. If - // we - // determined the expression is congruent to a different memory state, - // use that different memory state. If we determined it didn't, we - // update - // that as well. Note that currently, we do not guarantee the - // "different" memory state dominates us. The goal is to make things - // that are congruent look congruent, not ensure we can eliminate one in - // favor of the other. - // Right now, the only way they can be equivalent is for store - // expresions. - if (!isa<MemoryUse>(MA)) { - if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) { - auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } markMemoryUsersTouched(MA); } @@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) { } else { // Handle terminators that return values. All of them produce values we // don't currently understand. - if (!I->getType()->isVoidTy()){ + if (!I->getType()->isVoidTy()) { auto *Symbolized = createUnknownExpression(I); performCongruenceFinding(I, Symbolized); } @@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() { continue; if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second); - if (FirstMUD && SecondMUD) { - auto *FirstInst = FirstMUD->getMemoryInst(); - auto *SecondInst = SecondMUD->getMemoryInst(); + if (FirstMUD && SecondMUD) assert( - ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) && + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst()) && "The instructions for these memory operations should have been in " "the same congruence class"); - } } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have @@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, initializeCongruenceClasses(F); + unsigned int Iterations = 0; // We start out in the entry block. BasicBlock *LastBlock = &F.getEntryBlock(); while (TouchedInstructions.any()) { + ++Iterations; // Walk through all the instructions in all the blocks in RPO. for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; InstrNum = TouchedInstructions.find_next(InstrNum)) { @@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, TouchedInstructions.reset(InstrNum); } } - -// FIXME: Move this to expensive checks when we are satisfied with NewGVN + NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); #ifndef NDEBUG verifyMemoryCongruency(); #endif @@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // Cleanup the congruence class. SmallPtrSet<Value *, 4> MembersLeft; - for (Value * Member : CC->Members) { + for (Value *Member : CC->Members) { if (Member->getType()->isVoidTy()) { MembersLeft.insert(Member); continue; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index a2ceded106b4..a40079ca8e76 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS, /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks -/// have different "unqiue scopes" at every call site. Were this not done, then +/// have different "unique scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index dc526a20c903..842cf31f2e3d 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; - uint64_t ExitWeight = 0, BackEdgeWeight = 0; + uint64_t ExitWeight = 0, CurHeaderWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight; + // The # of times the loop body executes is the sum of the exit block + // weight and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; } // For each peeled-off iteration, make a copy of the loop. @@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; - // The exit weight of the previous iteration is the header entry weight - // of the current iteration. So this is exactly how many dynamic iterations - // the current peeled-off static iteration uses up. + // Subtract the exit weight from the current header weight -- the exit + // weight is exactly the weight of the previous iteration's header. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < BackEdgeWeight) - BackEdgeWeight -= ExitWeight; + if (ExitWeight < CurHeaderWeight) + CurHeaderWeight -= ExitWeight; else - BackEdgeWeight = 1; + CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, LI); @@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // Adjust the branch weights on the loop exit. if (ExitWeight) { + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3846b21c502e..54390e77bb1f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - // Update metadata and IR flags. + // The debug location for the "common" instruction is the merged locations of + // all the commoned instructions. We start with the original location of the + // "common" instruction and iteratively merge each location in the loop below. + DILocation *Loc = I0->getDebugLoc(); + + // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { + Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); combineMetadataForCSE(I0, I); I0->andIRFlags(I); } + if (!isa<CallInst>(I0)) + I0->setDebugLoc(Loc); if (!isa<StoreInst>(I0)) { // canSinkLastInstruction checked that all instructions were used by |