diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp | 417 |
1 files changed, 294 insertions, 123 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp index b16f8591b5a4..c6b6d75aefe8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -36,6 +36,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -46,7 +48,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -98,21 +99,33 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified"); STATISTIC(NumGVNEqProp, "Number of equalities propagated"); STATISTIC(NumPRELoad, "Number of loads PRE'd"); +STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, + "Number of blocks speculated as available in " + "IsValueFullyAvailableInBlock(), max"); +STATISTIC(MaxBBSpeculationCutoffReachedTimes, + "Number of times we we reached gvn-max-block-speculations cut-off " + "preventing further exploration"); + static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true)); static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true)); +static cl::opt<bool> +GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", + cl::init(true)); static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); -// Maximum allowed recursion depth. -static cl::opt<uint32_t> -MaxRecurseDepth("gvn-max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, - cl::desc("Max recurse depth in GVN (default = 1000)")); - static cl::opt<uint32_t> MaxNumDeps( "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("Max number of dependences to attempt Load PRE (default = 100)")); +// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. +static cl::opt<uint32_t> MaxBBSpeculations( + "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore, + cl::desc("Max number of blocks we're willing to speculate on (and recurse " + "into) when deducing if a value is fully available or not in GVN " + "(default = 600)")); + struct llvm::GVN::Expression { uint32_t opcode; bool commutative = false; @@ -282,9 +295,9 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { if (I->isCommutative()) { // Ensure that commutative instructions that only differ by a permutation // of their operands get the same value number by sorting the operand value - // numbers. Since all commutative instructions have two operands it is more + // numbers. Since commutative operands are the 1st two operands it is more // efficient to sort by hand rather than using, say, std::sort. - assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); + assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); e.commutative = true; @@ -353,9 +366,7 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { OI != OE; ++OI) e.varargs.push_back(lookupOrAdd(*OI)); - for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); + append_range(e.varargs, EI->indices()); return e; } @@ -399,9 +410,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { } if (local_dep.isDef()) { - CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); + // For masked load/store intrinsics, the local_dep may actully be + // a normal load or store instruction. + CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst()); - if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { + if (!local_cdep || + local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } @@ -626,6 +640,11 @@ bool GVN::isLoadInLoopPREEnabled() const { return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE); } +bool GVN::isLoadPRESplitBackedgeEnabled() const { + return Options.AllowLoadPRESplitBackedge.getValueOr( + GVNEnableSplitBackedgeInLoadPRE); +} + bool GVN::isMemDepEnabled() const { return Options.AllowMemDep.getValueOr(GVNEnableMemDep); } @@ -642,14 +661,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { auto *MemDep = isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr; auto *LI = AM.getCachedResult<LoopAnalysis>(F); + auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE); + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<GlobalsAA>(); PA.preserve<TargetLibraryAnalysis>(); + if (MSSA) + PA.preserve<MemorySSAAnalysis>(); if (LI) PA.preserve<LoopAnalysis>(); return PA; @@ -667,6 +690,18 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { } #endif +enum class AvailabilityState : char { + /// We know the block *is not* fully available. This is a fixpoint. + Unavailable = 0, + /// We know the block *is* fully available. This is a fixpoint. + Available = 1, + /// We do not know whether the block is fully available or not, + /// but we are currently speculating that it will be. + /// If it would have turned out that the block was, in fact, not fully + /// available, this would have been cleaned up into an Unavailable. + SpeculativelyAvailable = 2, +}; + /// Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -675,76 +710,118 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { /// 1) we know the block *is* fully available. /// 2) we do not know whether the block is fully available or not, but we are /// currently speculating that it will be. -/// 3) we are speculating for this block and have used that to speculate for -/// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, - DenseMap<BasicBlock*, char> &FullyAvailableBlocks, - uint32_t RecurseDepth) { - if (RecurseDepth > MaxRecurseDepth) - return false; - - // Optimistically assume that the block is fully available and check to see - // if we already know about this block in one lookup. - std::pair<DenseMap<BasicBlock*, char>::iterator, bool> IV = - FullyAvailableBlocks.insert(std::make_pair(BB, 2)); - - // If the entry already existed for this block, return the precomputed value. - if (!IV.second) { - // If this is a speculative "available" value, mark it as being used for - // speculation of other blocks. - if (IV.first->second == 2) - IV.first->second = 3; - return IV.first->second != 0; - } - - // Otherwise, see if it is fully available in all predecessors. - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - - // If this block has no predecessors, it isn't live-in here. - if (PI == PE) - goto SpeculationFailure; +static bool IsValueFullyAvailableInBlock( + BasicBlock *BB, + DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) { + SmallVector<BasicBlock *, 32> Worklist; + Optional<BasicBlock *> UnavailableBB; + + // The number of times we didn't find an entry for a block in a map and + // optimistically inserted an entry marking block as speculatively available. + unsigned NumNewNewSpeculativelyAvailableBBs = 0; + +#ifndef NDEBUG + SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs; + SmallVector<BasicBlock *, 32> AvailableBBs; +#endif - for (; PI != PE; ++PI) - // If the value isn't fully available in one of our predecessors, then it - // isn't fully available in this block either. Undo our previous - // optimistic assumption and bail out. - if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) - goto SpeculationFailure; + Worklist.emplace_back(BB); + while (!Worklist.empty()) { + BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first! + // Optimistically assume that the block is Speculatively Available and check + // to see if we already know about this block in one lookup. + std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV = + FullyAvailableBlocks.try_emplace( + CurrBB, AvailabilityState::SpeculativelyAvailable); + AvailabilityState &State = IV.first->second; + + // Did the entry already exist for this block? + if (!IV.second) { + if (State == AvailabilityState::Unavailable) { + UnavailableBB = CurrBB; + break; // Backpropagate unavailability info. + } - return true; +#ifndef NDEBUG + AvailableBBs.emplace_back(CurrBB); +#endif + continue; // Don't recurse further, but continue processing worklist. + } -// If we get here, we found out that this is not, after -// all, a fully-available block. We have a problem if we speculated on this and -// used the speculation to mark other blocks as available. -SpeculationFailure: - char &BBVal = FullyAvailableBlocks[BB]; + // No entry found for block. + ++NumNewNewSpeculativelyAvailableBBs; + bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations; + + // If we have exhausted our budget, mark this block as unavailable. + // Also, if this block has no predecessors, the value isn't live-in here. + if (OutOfBudget || pred_empty(CurrBB)) { + MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget; + State = AvailabilityState::Unavailable; + UnavailableBB = CurrBB; + break; // Backpropagate unavailability info. + } - // If we didn't speculate on this, just return with it set to false. - if (BBVal == 2) { - BBVal = 0; - return false; + // Tentatively consider this block as speculatively available. +#ifndef NDEBUG + NewSpeculativelyAvailableBBs.insert(CurrBB); +#endif + // And further recurse into block's predecessors, in depth-first order! + Worklist.append(pred_begin(CurrBB), pred_end(CurrBB)); } - // If we did speculate on this value, we could have blocks set to 1 that are - // incorrect. Walk the (transitive) successors of this block and mark them as - // 0 if set to one. - SmallVector<BasicBlock*, 32> BBWorklist; - BBWorklist.push_back(BB); - - do { - BasicBlock *Entry = BBWorklist.pop_back_val(); - // Note that this sets blocks to 0 (unavailable) if they happen to not - // already be in FullyAvailableBlocks. This is safe. - char &EntryVal = FullyAvailableBlocks[Entry]; - if (EntryVal == 0) continue; // Already unavailable. - - // Mark as unavailable. - EntryVal = 0; +#if LLVM_ENABLE_STATS + IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax( + NumNewNewSpeculativelyAvailableBBs); +#endif - BBWorklist.append(succ_begin(Entry), succ_end(Entry)); - } while (!BBWorklist.empty()); + // If the block isn't marked as fixpoint yet + // (the Unavailable and Available states are fixpoints) + auto MarkAsFixpointAndEnqueueSuccessors = + [&](BasicBlock *BB, AvailabilityState FixpointState) { + auto It = FullyAvailableBlocks.find(BB); + if (It == FullyAvailableBlocks.end()) + return; // Never queried this block, leave as-is. + switch (AvailabilityState &State = It->second) { + case AvailabilityState::Unavailable: + case AvailabilityState::Available: + return; // Don't backpropagate further, continue processing worklist. + case AvailabilityState::SpeculativelyAvailable: // Fix it! + State = FixpointState; +#ifndef NDEBUG + assert(NewSpeculativelyAvailableBBs.erase(BB) && + "Found a speculatively available successor leftover?"); +#endif + // Queue successors for further processing. + Worklist.append(succ_begin(BB), succ_end(BB)); + return; + } + }; + + if (UnavailableBB) { + // Okay, we have encountered an unavailable block. + // Mark speculatively available blocks reachable from UnavailableBB as + // unavailable as well. Paths are terminated when they reach blocks not in + // FullyAvailableBlocks or they are not marked as speculatively available. + Worklist.clear(); + Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB)); + while (!Worklist.empty()) + MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), + AvailabilityState::Unavailable); + } + +#ifndef NDEBUG + Worklist.clear(); + for (BasicBlock *AvailableBB : AvailableBBs) + Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB)); + while (!Worklist.empty()) + MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), + AvailabilityState::Available); + + assert(NewSpeculativelyAvailableBBs.empty() && + "Must have fixed all the new speculatively available blocks."); +#endif - return false; + return !UnavailableBB; } /// Given a set of loads specified by ValuesPerBlock, @@ -963,7 +1040,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo, if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of - // different types if we have to. If the stored value is larger or equal to + // different types if we have to. If the stored value is convertable to // the loaded value, we can reuse it. if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), DL)) @@ -1062,7 +1139,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); // Check that there is no implicit control flow instructions above our load in // its block. If there is an instruction that doesn't always pass the @@ -1079,8 +1155,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // because if the index is out of bounds we should deoptimize rather than // access the array. // Check that there is no guard in this block above our instruction. - if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI)) - return false; + bool MustEnsureSafetyOfSpeculativeExecution = + ICF->isDominatedByICFIFromSameBlock(LI); + while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1097,8 +1174,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Check that there is no implicit control flow in a block above. - if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB)) - return false; + MustEnsureSafetyOfSpeculativeExecution = + MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); } assert(TmpBB); @@ -1107,11 +1184,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. MapVector<BasicBlock *, Value *> PredLoads; - DenseMap<BasicBlock*, char> FullyAvailableBlocks; + DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks; for (const AvailableValueInBlock &AV : ValuesPerBlock) - FullyAvailableBlocks[AV.BB] = true; + FullyAvailableBlocks[AV.BB] = AvailabilityState::Available; for (BasicBlock *UnavailableBB : UnavailableBlocks) - FullyAvailableBlocks[UnavailableBB] = false; + FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; SmallVector<BasicBlock *, 4> CriticalEdgePred; for (BasicBlock *Pred : predecessors(LoadBB)) { @@ -1124,7 +1201,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } - if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { continue; } @@ -1151,6 +1228,16 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } + // Do not split backedge as it will break the canonical loop form. + if (!isLoadPRESplitBackedgeEnabled()) + if (DT->dominates(LoadBB, Pred)) { + LLVM_DEBUG( + dbgs() + << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + CriticalEdgePred.push_back(Pred); } else { // Only add the predecessors that will not be split for now. @@ -1170,6 +1257,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (NumUnavailablePreds != 1) return false; + // Now we know where we will insert load. We must ensure that it is safe + // to speculatively execute the load at that points. + if (MustEnsureSafetyOfSpeculativeExecution) { + if (CriticalEdgePred.size()) + if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT)) + return false; + for (auto &PL : PredLoads) + if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT)) + return false; + } + // Split critical edges, and update the unavailable predecessors accordingly. for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); @@ -1251,8 +1349,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Instructions that have been inserted in predecessor(s) to materialize // the load address do not retain their original debug locations. Doing // so could lead to confusing (but correct) source attributions. - if (const DebugLoc &DL = I->getDebugLoc()) - I->setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt())); + I->updateLocationAfterHoist(); // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into @@ -1270,6 +1367,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); + if (MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + // Get the defining access of the original load or use the load if it is a + // MemoryDef (e.g. because it is volatile). The inserted loads are + // guaranteed to load from the same definition. + auto *LIAcc = MSSA->getMemoryAccess(LI); + auto *DefiningAcc = + isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); + auto *NewAccess = MSSAU->createMemoryAccessInBB( + NewLoad, DefiningAcc, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess)) + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + else + MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true); + } // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1357,13 +1470,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; } + bool Changed = false; // If this load follows a GEP, see if we can PRE the indices before analyzing. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), OE = GEP->idx_end(); OI != OE; ++OI) if (Instruction *I = dyn_cast<Instruction>(OI->get())) - performScalarPRE(I); + Changed |= performScalarPRE(I); } // Step 2: Analyze the availability of the load @@ -1374,7 +1488,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) - return false; + return Changed; // Step 3: Eliminate fully redundancy. // @@ -1406,12 +1520,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // Step 4: Eliminate partial redundancy. if (!isPREEnabled() || !isLoadPREEnabled()) - return false; + return Changed; if (!isLoadInLoopPREEnabled() && this->LI && this->LI->getLoopFor(LI->getParent())) - return false; + return Changed; - return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); + return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } static bool impliesEquivalanceIfTrue(CmpInst* Cmp) { @@ -1486,9 +1600,40 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - new StoreInst(UndefValue::get(Int8Ty), - Constant::getNullValue(Int8Ty->getPointerTo()), - IntrinsicI); + auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + if (MSSAU) { + const MemoryUseOrDef *FirstNonDom = nullptr; + const auto *AL = + MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent()); + + // If there are accesses in the current basic block, find the first one + // that does not come before NewS. The new memory access is inserted + // after the found access or before the terminator if no such access is + // found. + if (AL) { + for (auto &Acc : *AL) { + if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc)) + if (!Current->getMemoryInst()->comesBefore(NewS)) { + FirstNonDom = Current; + break; + } + } + } + + // This added store is to null, so it will never executed and we can + // just use the LiveOnEntry def as defining access. + auto *NewDef = + FirstNonDom ? MSSAU->createMemoryAccessBefore( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), + const_cast<MemoryUseOrDef *>(FirstNonDom)) + : MSSAU->createMemoryAccessInBB( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), + NewS->getParent(), MemorySSA::BeforeTerminator); + + MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false); + } } if (isAssumeWithEmptyBundle(*IntrinsicI)) markInstructionForDeletion(IntrinsicI); @@ -1516,6 +1661,11 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true ReplaceOperandsWithMap[V] = True; + // Similarly, after assume(!NotV) we know that NotV == false. + Value *NotV; + if (match(V, m_Not(m_Value(NotV)))) + ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); + // If we find an equality fact, canonicalize all dominated uses in this block // to one of the two values. We heuristically choice the "oldest" of the // two where age is determined by value number. (Note that propagateEquality @@ -1622,6 +1772,8 @@ bool GVN::processLoad(LoadInst *L) { // Replace the load! patchAndReplaceAllUsesWith(L, AvailableValue); markInstructionForDeletion(L); + if (MSSAU) + MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more @@ -1743,7 +1895,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } if (Exp.commutative) { - assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); + assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); if (Exp.varargs[0] > Exp.varargs[1]) { std::swap(Exp.varargs[0], Exp.varargs[1]); uint32_t Opcode = Exp.opcode >> 8; @@ -1766,11 +1918,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, /// again. void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock) { - for (const BasicBlock *Pred : predecessors(&CurrBlock)) { - auto FindRes = PhiTranslateTable.find({Num, Pred}); - if (FindRes != PhiTranslateTable.end()) - PhiTranslateTable.erase(FindRes); - } + for (const BasicBlock *Pred : predecessors(&CurrBlock)) + PhiTranslateTable.erase({Num, Pred}); } // In order to find a leader for a given value number at a @@ -1934,8 +2083,8 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // If "A && B" is known true then both A and B are known true. If "A || B" // is known false then both A and B are known false. Value *A, *B; - if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || - (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { + if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) { Worklist.push_back(std::make_pair(A, RHS)); Worklist.push_back(std::make_pair(B, RHS)); continue; @@ -2137,7 +2286,7 @@ bool GVN::processInstruction(Instruction *I) { bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE) { + OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2150,6 +2299,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; + MemorySSAUpdater Updater(MSSA); + MSSAU = MSSA ? &Updater : nullptr; bool Changed = false; bool ShouldContinue = true; @@ -2160,7 +2311,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD); + bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); if (removedBlock) ++NumGVNBlocks; @@ -2196,6 +2347,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // iteration. DeadBlocks.clear(); + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + return Changed; } @@ -2236,6 +2390,8 @@ bool GVN::processBlock(BasicBlock *BB) { salvageKnowledge(I, AC); salvageDebugInfo(*I); if (MD) MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); ICF->removeInstruction(I); I->eraseFromParent(); @@ -2323,10 +2479,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) { if (isa<GetElementPtrInst>(CurInst)) return false; - // We don't currently value number ANY inline asm calls. - if (auto *CallB = dyn_cast<CallBase>(CurInst)) + if (auto *CallB = dyn_cast<CallBase>(CurInst)) { + // We don't currently value number ANY inline asm calls. if (CallB->isInlineAsm()) return false; + // Don't do PRE on convergent calls. + if (CallB->isConvergent()) + return false; + } uint32_t ValNo = VN.lookup(CurInst); @@ -2466,6 +2626,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); + if (MSSAU) + MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes // some assertion failures. @@ -2510,10 +2672,12 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { // possible. BasicBlock *BB = SplitCriticalEdge( Pred, Succ, - CriticalEdgeSplittingOptions(DT, LI).unsetPreserveLoopSimplify()); - if (MD) - MD->invalidateCachedPredecessors(); - InvalidBlockRPONumbers = true; + CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); + if (BB) { + if (MD) + MD->invalidateCachedPredecessors(); + InvalidBlockRPONumbers = true; + } return BB; } @@ -2522,14 +2686,20 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { bool GVN::splitCriticalEdges() { if (toSplit.empty()) return false; + + bool Changed = false; do { std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val(); - SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT, LI)); + Changed |= SplitCriticalEdge(Edge.first, Edge.second, + CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != + nullptr; } while (!toSplit.empty()); - if (MD) MD->invalidateCachedPredecessors(); - InvalidBlockRPONumbers = true; - return true; + if (Changed) { + if (MD) + MD->invalidateCachedPredecessors(); + InvalidBlockRPONumbers = true; + } + return Changed; } /// Executes one iteration of GVN @@ -2633,13 +2803,12 @@ void GVN::addDeadBlock(BasicBlock *BB) { // First, split the critical edges. This might also create additional blocks // to preserve LoopSimplify form and adjust edges accordingly. - SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); + SmallVector<BasicBlock *, 4> Preds(predecessors(B)); for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; - if (llvm::any_of(successors(P), - [B](BasicBlock *Succ) { return Succ == B; }) && + if (llvm::is_contained(successors(P), B) && isCriticalEdge(P->getTerminator(), B)) { if (BasicBlock *S = splitCriticalEdges(P, B)) DeadBlocks.insert(P = S); @@ -2724,6 +2893,7 @@ public: auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); return Impl.runImpl( F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), getAnalysis<DominatorTreeWrapperPass>().getDomTree(), @@ -2733,7 +2903,8 @@ public: ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, - &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE()); + &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2744,12 +2915,12 @@ public: if (Impl.isMemDepEnabled()) AU.addRequired<MemoryDependenceWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<TargetLibraryInfoWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); } private: |