diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-02-05 20:07:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-05-14 11:44:47 +0000 |
commit | 1fd87a682ad7442327078e1eeb63edc4258f9815 (patch) | |
tree | 83b42223e987ef7df2e1036937bc1bb627fa2779 /contrib/llvm-project/llvm/lib/Transforms | |
parent | 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623 (diff) | |
parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) | |
download | src-1fd87a682ad7442327078e1eeb63edc4258f9815.tar.gz src-1fd87a682ad7442327078e1eeb63edc4258f9815.zip |
Merge llvm-project main llvmorg-14-init-18294-gdb01b123d012
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-14-init-18294-gdb01b123d012, the last commit before
the upstream release/14.x branch was created.
PR: 261742
MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
56 files changed, 1977 insertions, 806 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 92acfb93057a..9c16d3750998 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -23,6 +23,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index ce3c5153bde2..e6a542385662 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -46,6 +46,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -365,26 +366,25 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), - I2 = NF->arg_begin(); - I != E; ++I) { - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + Function::arg_iterator I2 = NF->arg_begin(); + for (Argument &Arg : F->args()) { + if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. - I->replaceAllUsesWith(&*I2); - I2->takeName(&*I); + Arg.replaceAllUsesWith(&*I2); + I2->takeName(&Arg); ++I2; continue; } - if (ByValArgsToTransform.count(&*I)) { + if (ByValArgsToTransform.count(&Arg)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - Align StructAlign = *I->getParamAlign(); + Type *AgTy = Arg.getParamByValType(); + Align StructAlign = *Arg.getParamAlign(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, StructAlign, "", InsertPt); StructType *STy = cast<StructType>(AgTy); @@ -397,41 +397,41 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); - I2->setName(I->getName() + "." + Twine(i)); + I2->setName(Arg.getName() + "." + Twine(i)); Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); } // Anything that used the arg should now use the alloca. - I->replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&*I); + Arg.replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(&Arg); continue; } // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( - [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); }); + [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); - if (I->use_empty()) + if (Arg.use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; - while (!I->use_empty()) { - if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { + while (!Arg.use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); - I2->setName(I->getName() + ".val"); + I2->setName(Arg.getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() + LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() << "' in function '" << F->getName() << "'\n"); } else { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); + GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back()); assert(!GEP->use_empty() && "GEPs without uses should be cleaned up already"); IndicesVector Operands; @@ -449,7 +449,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, assert(It != ArgIndices.end() && "GEP not handled??"); } - TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(), + TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), make_range(Operands.begin(), Operands.end()))); LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() @@ -610,12 +610,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; }; - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. + // First, iterate functions that are guaranteed to execution on function + // entry and mark loads of (geps of) arguments as safe. BasicBlock &EntryBlock = Arg->getParent()->front(); // Declare this here so we can reuse it IndicesVector Indices; - for (Instruction &I : EntryBlock) + for (Instruction &I : EntryBlock) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { @@ -649,6 +649,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR } } + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } + // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst *, 16> Loads; @@ -830,7 +834,10 @@ static bool canPaddingBeAccessed(Argument *arg) { return false; } -bool ArgumentPromotionPass::areFunctionArgsABICompatible( +/// Check if callers and the callee \p F agree how promoted arguments would be +/// passed. The ones that they do not agree on are eliminated from the sets but +/// the return value has to be observed as well. +static bool areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { @@ -1003,7 +1010,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + if (!areFunctionArgsABICompatible( *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp index 12b8a0ef9d00..d66140a726f6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp @@ -183,6 +183,31 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) { } ///} +bool AA::isNoSyncInst(Attributor &A, const Instruction &I, + const AbstractAttribute &QueryingAA) { + // We are looking for volatile instructions or non-relaxed atomics. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->hasFnAttr(Attribute::NoSync)) + return true; + + // Non-convergent and readnone imply nosync. + if (!CB->isConvergent() && !CB->mayReadOrWriteMemory()) + return true; + + if (AANoSync::isNoSyncIntrinsic(&I)) + return true; + + const auto &NoSyncAA = A.getAAFor<AANoSync>( + QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); + return NoSyncAA.isAssumedNoSync(); + } + + if (!I.mayReadOrWriteMemory()) + return true; + + return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I); +} + bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, const Value &V) { if (auto *C = dyn_cast<Constant>(&V)) @@ -370,6 +395,162 @@ bool AA::getPotentialCopiesOfStoredValue( return true; } +static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, + bool RequireReadNone, bool &IsKnown) { + + IRPosition::Kind Kind = IRP.getPositionKind(); + if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) { + const auto &MemLocAA = + A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE); + if (MemLocAA.isAssumedReadNone()) { + IsKnown = MemLocAA.isKnownReadNone(); + if (!IsKnown) + A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL); + return true; + } + } + + const auto &MemBehaviorAA = + A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE); + if (MemBehaviorAA.isAssumedReadNone() || + (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) { + IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone() + : MemBehaviorAA.isKnownReadOnly(); + if (!IsKnown) + A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL); + return true; + } + + return false; +} + +bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown) { + return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA, + /* RequireReadNone */ false, IsKnown); +} +bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown) { + return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA, + /* RequireReadNone */ true, IsKnown); +} + +static bool +isPotentiallyReachable(Attributor &A, const Instruction &FromI, + const Instruction *ToI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() + << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) + << "]\n"); + + SmallPtrSet<const Instruction *, 8> Visited; + SmallVector<const Instruction *> Worklist; + Worklist.push_back(&FromI); + + while (!Worklist.empty()) { + const Instruction *CurFromI = Worklist.pop_back_val(); + if (!Visited.insert(CurFromI).second) + continue; + + const Function *FromFn = CurFromI->getFunction(); + if (FromFn == &ToFn) { + if (!ToI) + return true; + LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI + << " intraprocedurally\n"); + const auto &ReachabilityAA = A.getAAFor<AAReachability>( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); + bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " " + << (Result ? "can potentially " : "cannot ") << "reach " + << *ToI << " [Intra]\n"); + if (Result) + return true; + continue; + } + + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " + << *CurFromI << " is not checked backwards, abort\n"); + return true; + } + + // Check if the current instruction is already known to reach the ToFn. + const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>( + QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); + bool Result = FnReachabilityAA.instructionCanReach( + A, *CurFromI, ToFn, /* UseBackwards */ false); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() + << " " << (Result ? "can potentially " : "cannot ") + << "reach @" << ToFn.getName() << " [FromFn]\n"); + if (Result) + return true; + + // If we do not go backwards from the FromFn we are done here and so far we + // could not find a way to reach ToFn/ToI. + if (!GoBackwardsCB(*FromFn)) + continue; + + LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @" + << FromFn->getName() << "\n"); + + auto CheckCallSite = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + if (!CB) + return false; + + if (isa<InvokeInst>(CB)) + return false; + + Instruction *Inst = CB->getNextNonDebugInstruction(); + Worklist.push_back(Inst); + return true; + }; + + bool AllCallSitesKnown; + Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, + /* RequireAllCallSites */ true, + &QueryingAA, AllCallSitesKnown); + if (Result) { + LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " failed, give up\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " worklist size is: " << Worklist.size() << "\n"); + } + return false; +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from " + << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + const Function *ToFn = ToI.getFunction(); + return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA, + GoBackwardsCB); +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA, + GoBackwardsCB); +} + /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -704,9 +885,8 @@ void IRPosition::verify() { "Expected a nullptr for an invalid position!"); return; case IRP_FLOAT: - assert((!isa<CallBase>(&getAssociatedValue()) && - !isa<Argument>(&getAssociatedValue())) && - "Expected specialized kind for call base and argument values!"); + assert((!isa<Argument>(&getAssociatedValue())) && + "Expected specialized kind for argument values!"); return; case IRP_RETURNED: assert(isa<Function>(getAsValuePtr()) && @@ -900,7 +1080,7 @@ bool Attributor::isAssumedDead(const Use &U, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); } - return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA, + return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); } @@ -923,7 +1103,8 @@ bool Attributor::isAssumedDead(const Instruction &I, // If we have a context instruction and a liveness AA we use it. if (FnLivenessAA && FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() && - FnLivenessAA->isAssumedDead(&I)) { + (CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent()) + : FnLivenessAA->isAssumedDead(&I))) { if (QueryingAA) recordDependence(*FnLivenessAA, *QueryingAA, DepClass); if (!FnLivenessAA->isKnownDead(&I)) @@ -934,8 +1115,9 @@ bool Attributor::isAssumedDead(const Instruction &I, if (CheckBBLivenessOnly) return false; - const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>( - IRPosition::value(I, CBCtx), QueryingAA, DepClassTy::NONE); + const IRPosition IRP = IRPosition::inst(I, CBCtx); + const AAIsDead &IsDeadAA = + getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE); // Don't check liveness for AAIsDead. if (QueryingAA == &IsDeadAA) return false; @@ -1035,8 +1217,14 @@ bool Attributor::checkForAllUses( const Use *U = Worklist.pop_back_val(); if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second) continue; - LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " - << *U->getUser() << "\n"); + LLVM_DEBUG({ + if (auto *Fn = dyn_cast<Function>(U->getUser())) + dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName() + << "\n"; + else + dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser() + << "\n"; + }); bool UsedAssumedInformation = false; if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, LivenessDepClass)) { @@ -1126,8 +1314,14 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses())); for (unsigned u = 0; u < Uses.size(); ++u) { const Use &U = *Uses[u]; - LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in " - << *U.getUser() << "\n"); + LLVM_DEBUG({ + if (auto *Fn = dyn_cast<Function>(U)) + dbgs() << "[Attributor] Check use: " << Fn->getName() << " in " + << *U.getUser() << "\n"; + else + dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser() + << "\n"; + }); bool UsedAssumedInformation = false; if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation, /* CheckBBLivenessOnly */ true)) { @@ -1268,9 +1462,12 @@ static bool checkForAllInstructionsImpl( for (Instruction *I : *Insts) { // Skip dead instructions. if (A && !CheckPotentiallyDead && - A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA, - UsedAssumedInformation, CheckBBLivenessOnly)) + A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA, + UsedAssumedInformation, CheckBBLivenessOnly)) { + LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I + << " is potentially dead, skip!\n";); continue; + } if (!Pred(*I)) return false; @@ -1329,7 +1526,7 @@ bool Attributor::checkForAllReadWriteInstructions( for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { // Skip dead instructions. - if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA, + if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA, UsedAssumedInformation)) continue; @@ -1381,9 +1578,11 @@ void Attributor::runTillFixpoint() { InvalidAA->Deps.pop_back(); AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer()); if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) { + LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA); Worklist.insert(DepAA); continue; } + LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA); DepAA->getState().indicatePessimisticFixpoint(); assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!"); if (!DepAA->getState().isValidState()) @@ -1433,6 +1632,9 @@ void Attributor::runTillFixpoint() { // Note that dependent ones are added above. Worklist.clear(); Worklist.insert(ChangedAAs.begin(), ChangedAAs.end()); + Worklist.insert(QueryAAsAwaitingUpdate.begin(), + QueryAAsAwaitingUpdate.end()); + QueryAAsAwaitingUpdate.clear(); } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || VerifyMaxFixpointIterations)); @@ -1492,6 +1694,12 @@ void Attributor::runTillFixpoint() { } } +void Attributor::registerForUpdate(AbstractAttribute &AA) { + assert(AA.isQueryAA() && + "Non-query AAs should not be required to register for updates!"); + QueryAAsAwaitingUpdate.insert(&AA); +} + ChangeStatus Attributor::manifestAttributes() { TimeTraceScope TimeScope("Attributor::manifestAttributes"); size_t NumFinalAAs = DG.SyntheticRoot.Deps.size(); @@ -1792,7 +2000,7 @@ ChangeStatus Attributor::cleanupIR() { // Actually we do not delete the blocks but squash them into a single // unreachable but untangling branches that jump here is something we need // to do in a more generic way. - DetatchDeadBlocks(ToBeDeletedBBs, nullptr); + detachDeadBlocks(ToBeDeletedBBs, nullptr); } identifyDeadInternalFunctions(); @@ -1897,7 +2105,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) { /* CheckBBLivenessOnly */ true)) CS = AA.update(*this); - if (DV.empty()) { + if (!AA.isQueryAA() && DV.empty()) { // If the attribute did not query any non-fix information, the state // will not change and we can indicate that right away. AAState.indicateOptimisticFixpoint(); @@ -2601,12 +2809,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { auto CallSitePred = [&](Instruction &I) -> bool { auto &CB = cast<CallBase>(I); - IRPosition CBRetPos = IRPosition::callsite_returned(CB); + IRPosition CBInstPos = IRPosition::inst(CB); IRPosition CBFnPos = IRPosition::callsite_function(CB); // Call sites might be dead if they do not have side effects and no live // users. The return value might be dead if there are no live users. - getOrCreateAAFor<AAIsDead>(CBRetPos); + getOrCreateAAFor<AAIsDead>(CBInstPos); Function *Callee = CB.getCalledFunction(); // TODO: Even if the callee is not known now we might be able to simplify diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 76420783b2d1..2d88e329e093 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,12 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); +static cl::opt<unsigned> + MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden, + cl::desc("Maximum number of interfering writes to " + "check before assuming all might interfere."), + cl::init(6)); + STATISTIC(NumAAs, "Number of abstract attributes created"); // Some helper macros to deal with statistics tracking. @@ -244,6 +251,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, /// once. Note that the value used for the callback may still be the value /// associated with \p IRP (due to PHIs). To limit how much effort is invested, /// we will never visit more values than specified by \p MaxValues. +/// If \p Intraprocedural is set to true only values valid in the scope of +/// \p CtxI will be visited and simplification into other scopes is prevented. template <typename StateTy> static bool genericValueTraversal( Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, @@ -251,7 +260,8 @@ static bool genericValueTraversal( function_ref<bool(Value &, const Instruction *, StateTy &, bool)> VisitValueCB, const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16, - function_ref<Value *(Value *)> StripCB = nullptr) { + function_ref<Value *(Value *)> StripCB = nullptr, + bool Intraprocedural = false) { const AAIsDead *LivenessAA = nullptr; if (IRP.getAnchorScope()) @@ -281,8 +291,11 @@ static bool genericValueTraversal( continue; // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) + if (Iteration++ >= MaxValues) { + LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " + << Iteration << "!\n"); return false; + } // Explicitly look through calls with a "returned" attribute if we do // not have a pointer as stripPointerCasts only works on them. @@ -331,10 +344,7 @@ static bool genericValueTraversal( "Expected liveness in the presence of instructions!"); for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - bool UsedAssumedInformation = false; - if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA, - LivenessAA, UsedAssumedInformation, - /* CheckBBLivenessOnly */ true)) { + if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { AnyDead = true; continue; } @@ -344,24 +354,49 @@ static bool genericValueTraversal( continue; } + if (auto *Arg = dyn_cast<Argument>(V)) { + if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) { + SmallVector<Item> CallSiteValues; + bool AllCallSitesKnown = true; + if (A.checkForAllCallSites( + [&](AbstractCallSite ACS) { + // Callbacks might not have a corresponding call site operand, + // stick with the argument in that case. + Value *CSOp = ACS.getCallArgOperand(*Arg); + if (!CSOp) + return false; + CallSiteValues.push_back({CSOp, ACS.getInstruction()}); + return true; + }, + *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) { + Worklist.append(CallSiteValues); + continue; + } + } + } + if (UseValueSimplify && !isa<Constant>(V)) { bool UsedAssumedInformation = false; Optional<Value *> SimpleV = A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); if (!SimpleV.hasValue()) continue; - if (!SimpleV.getValue()) - return false; Value *NewV = SimpleV.getValue(); - if (NewV != V) { - Worklist.push_back({NewV, CtxI}); - continue; + if (NewV && NewV != V) { + if (!Intraprocedural || !CtxI || + AA::isValidInScope(*NewV, CtxI->getFunction())) { + Worklist.push_back({NewV, CtxI}); + continue; + } } } // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) + if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { + LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " + << *V << "!\n"); return false; + } } while (!Worklist.empty()); // If we actually used liveness information so we have to record a dependence. @@ -375,7 +410,8 @@ static bool genericValueTraversal( bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl<Value *> &Objects, const AbstractAttribute &QueryingAA, - const Instruction *CtxI) { + const Instruction *CtxI, + bool Intraprocedural) { auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; SmallPtrSet<Value *, 8> SeenObjects; auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, @@ -387,7 +423,7 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, }; if (!genericValueTraversal<decltype(Objects)>( A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - true, 32, StripCB)) + true, 32, StripCB, Intraprocedural)) return false; return true; } @@ -620,7 +656,7 @@ struct AACallSiteReturnedFromReturned : public BaseType { if (!AssociatedFunction) return S.indicatePessimisticFixpoint(); - CallBase &CBContext = static_cast<CallBase &>(this->getAnchorValue()); + CallBase &CBContext = cast<CallBase>(this->getAnchorValue()); if (IntroduceCallBaseContext) LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:" << CBContext << "\n"); @@ -1026,7 +1062,6 @@ private: BooleanState BS; }; -namespace { struct AAPointerInfoImpl : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> { using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; @@ -1058,6 +1093,165 @@ struct AAPointerInfoImpl const override { return State::forallInterferingAccesses(SI, CB); } + bool forallInterferingWrites( + Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + function_ref<bool(const Access &, bool)> UserCB) const override { + SmallPtrSet<const Access *, 8> DominatingWrites; + SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites; + + Function &Scope = *LI.getFunction(); + const auto &NoSyncAA = A.getAAFor<AANoSync>( + QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); + const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( + IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + const bool NoSync = NoSyncAA.isAssumedNoSync(); + + // Helper to determine if we need to consider threading, which we cannot + // right now. However, if the function is (assumed) nosync or the thread + // executing all instructions is the main thread only we can ignore + // threading. + auto CanIgnoreThreading = [&](const Instruction &I) -> bool { + if (NoSync) + return true; + if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I)) + return true; + return false; + }; + + // Helper to determine if the access is executed by the same thread as the + // load, for now it is sufficient to avoid any potential threading effects + // as we cannot deal with them anyway. + auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool { + return CanIgnoreThreading(*Acc.getLocalInst()); + }; + + // TODO: Use inter-procedural reachability and dominance. + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + QueryingAA, IRPosition::function(*LI.getFunction()), + DepClassTy::OPTIONAL); + + const bool CanUseCFGResoning = CanIgnoreThreading(LI); + InformationCache &InfoCache = A.getInfoCache(); + const DominatorTree *DT = + NoRecurseAA.isKnownNoRecurse() + ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( + Scope) + : nullptr; + + enum GPUAddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Local = 5, + }; + + // Helper to check if a value has "kernel lifetime", that is it will not + // outlive a GPU kernel. This is true for shared, constant, and local + // globals on AMD and NVIDIA GPUs. + auto HasKernelLifetime = [&](Value *V, Module &M) { + Triple T(M.getTargetTriple()); + if (!(T.isAMDGPU() || T.isNVPTX())) + return false; + switch (V->getType()->getPointerAddressSpace()) { + case GPUAddressSpace::Shared: + case GPUAddressSpace::Constant: + case GPUAddressSpace::Local: + return true; + default: + return false; + }; + }; + + // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query + // to determine if we should look at reachability from the callee. For + // certain pointers we know the lifetime and we do not have to step into the + // callee to determine reachability as the pointer would be dead in the + // callee. See the conditional initialization below. + std::function<bool(const Function &)> IsLiveInCalleeCB; + + if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) { + // If the alloca containing function is not recursive the alloca + // must be dead in the callee. + const Function *AIFn = AI->getFunction(); + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) { + IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; }; + } + } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) { + // If the global has kernel lifetime we can stop if we reach a kernel + // as it is "dead" in the (unknown) callees. + if (HasKernelLifetime(GV, *GV->getParent())) + IsLiveInCalleeCB = [](const Function &Fn) { + return !Fn.hasFnAttribute("kernel"); + }; + } + + auto AccessCB = [&](const Access &Acc, bool Exact) { + if (!Acc.isWrite()) + return true; + + // For now we only filter accesses based on CFG reasoning which does not + // work yet if we have threading effects, or the access is complicated. + if (CanUseCFGResoning) { + if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, + IsLiveInCalleeCB)) + return true; + if (DT && Exact && + (Acc.getLocalInst()->getFunction() == LI.getFunction()) && + IsSameThreadAsLoad(Acc)) { + if (DT->dominates(Acc.getLocalInst(), &LI)) + DominatingWrites.insert(&Acc); + } + } + + InterferingWrites.push_back({&Acc, Exact}); + return true; + }; + if (!State::forallInterferingAccesses(LI, AccessCB)) + return false; + + // If we cannot use CFG reasoning we only filter the non-write accesses + // and are done here. + if (!CanUseCFGResoning) { + for (auto &It : InterferingWrites) + if (!UserCB(*It.first, It.second)) + return false; + return true; + } + + // Helper to determine if we can skip a specific write access. This is in + // the worst case quadratic as we are looking for another write that will + // hide the effect of this one. + auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if (!IsSameThreadAsLoad(Acc)) + return false; + if (!DominatingWrites.count(&Acc)) + return false; + for (const Access *DomAcc : DominatingWrites) { + assert(Acc.getLocalInst()->getFunction() == + DomAcc->getLocalInst()->getFunction() && + "Expected dominating writes to be in the same function!"); + + if (DomAcc != &Acc && + DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) { + return true; + } + } + return false; + }; + + // Run the user callback on all writes we cannot skip and return if that + // succeeded for all or not. + unsigned NumInterferingWrites = InterferingWrites.size(); + for (auto &It : InterferingWrites) + if (!DT || NumInterferingWrites > MaxInterferingWrites || + !CanSkipAccess(*It.first, It.second)) + if (!UserCB(*It.first, It.second)) + return false; + return true; + } ChangeStatus translateAndAddCalleeState(Attributor &A, const AAPointerInfo &CalleeAA, @@ -1200,9 +1394,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { << " : " << *Idx << "\n"); return false; } - UsrOI.Offset = PtrOI.Offset + - DL.getIndexedOffsetInType( - GEP->getSourceElementType(), Indices); + UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType( + GEP->getSourceElementType(), Indices); Follow = true; return true; } @@ -1693,17 +1886,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret, bool) -> bool { - bool UsedAssumedInformation = false; - Optional<Value *> SimpleRetVal = - A.getAssumedSimplified(V, *this, UsedAssumedInformation); - if (!SimpleRetVal.hasValue()) - return true; - if (!SimpleRetVal.getValue()) - return false; - Value *RetVal = *SimpleRetVal; - assert(AA::isValidInScope(*RetVal, Ret.getFunction()) && + assert(AA::isValidInScope(V, Ret.getFunction()) && "Assumed returned value should be valid in function scope!"); - if (ReturnedValues[RetVal].insert(&Ret)) + if (ReturnedValues[&V].insert(&Ret)) Changed = ChangeStatus::CHANGED; return true; }; @@ -1712,7 +1897,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { ReturnInst &Ret = cast<ReturnInst>(I); return genericValueTraversal<ReturnInst>( A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I); + &I, /* UseValueSimplify */ true, /* MaxValues */ 16, + /* StripCB */ nullptr, /* Intraprocedural */ true); }; // Discover returned values from all live returned instructions in the @@ -1767,24 +1953,16 @@ struct AANoSyncImpl : AANoSync { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - - /// Helper function used to determine whether an instruction is non-relaxed - /// atomic. In other words, if an atomic instruction does not have unordered - /// or monotonic ordering - static bool isNonRelaxedAtomic(Instruction *I); - - /// Helper function specific for intrinsics which are potentially volatile - static bool isNoSyncIntrinsic(Instruction *I); }; -bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { +bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; if (auto *FI = dyn_cast<FenceInst>(I)) // All legal orderings for fence are stronger than monotonic. return FI->getSyncScopeID() != SyncScope::SingleThread; - else if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) { + if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) { // Unordered is not a legal ordering for cmpxchg. return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic || AI->getFailureOrdering() != AtomicOrdering::Monotonic); @@ -1813,7 +1991,7 @@ bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { /// Return true if this intrinsic is nosync. This is only used for intrinsics /// which would be nosync except that they have a volatile flag. All other /// intrinsics are simply annotated with the nosync attribute in Intrinsics.td. -bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { +bool AANoSync::isNoSyncIntrinsic(const Instruction *I) { if (auto *MI = dyn_cast<MemIntrinsic>(I)) return !MI->isVolatile(); return false; @@ -1822,24 +2000,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { auto CheckRWInstForNoSync = [&](Instruction &I) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - - if (const auto *CB = dyn_cast<CallBase>(&I)) { - if (CB->hasFnAttr(Attribute::NoSync)) - return true; - - if (isNoSyncIntrinsic(&I)) - return true; - - const auto &NoSyncAA = A.getAAFor<AANoSync>( - *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED); - return NoSyncAA.isAssumedNoSync(); - } - - if (!I.isVolatile() && !isNonRelaxedAtomic(&I)) - return true; - - return false; + return AA::isNoSyncInst(A, I, *this); }; auto CheckForNoSync = [&](Instruction &I) { @@ -2327,16 +2488,6 @@ struct AANoRecurseFunction final : AANoRecurseImpl { AANoRecurseFunction(const IRPosition &IRP, Attributor &A) : AANoRecurseImpl(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoRecurseImpl::initialize(A); - // TODO: We should build a call graph ourselves to enable this in the module - // pass as well. - if (const Function *F = getAnchorScope()) - if (A.getInfoCache().getSccSize(*F) != 1) - indicatePessimisticFixpoint(); - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -2359,27 +2510,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl { return ChangeStatus::UNCHANGED; } - // If the above check does not hold anymore we look at the calls. - auto CheckForNoRecurse = [&](Instruction &I) { - const auto &CB = cast<CallBase>(I); - if (CB.hasFnAttr(Attribute::NoRecurse)) - return true; - - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); - if (!NoRecurseAA.isAssumedNoRecurse()) - return false; - - // Recursion to the same function - if (CB.getCalledFunction() == getAnchorScope()) - return false; - - return true; - }; - - bool UsedAssumedInformation = false; - if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this, - UsedAssumedInformation)) + const AAFunctionReachability &EdgeReachability = + A.getAAFor<AAFunctionReachability>(*this, getIRPosition(), + DepClassTy::REQUIRED); + if (EdgeReachability.canReach(A, *getAnchorScope())) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } @@ -2798,16 +2932,10 @@ struct AAWillReturnImpl : public AAWillReturn { (!getAssociatedFunction() || !getAssociatedFunction()->mustProgress())) return false; - const auto &MemAA = - A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE); - if (!MemAA.isAssumedReadOnly()) - return false; - if (KnownOnly && !MemAA.isKnownReadOnly()) - return false; - if (!MemAA.isKnownReadOnly()) - A.recordDependence(MemAA, *this, DepClassTy::OPTIONAL); - - return true; + bool IsKnown; + if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) + return IsKnown || !KnownOnly; + return false; } /// See AbstractAttribute::updateImpl(...). @@ -2904,6 +3032,10 @@ struct AAReachabilityImpl : AAReachability { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); + if (!NoRecurseAA.isAssumedNoRecurse()) + return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } }; @@ -3008,9 +3140,8 @@ struct AANoAliasArgument final return Base::updateImpl(A); // If the argument is read-only, no-alias cannot break synchronization. - const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( - *this, getIRPosition(), DepClassTy::OPTIONAL); - if (MemBehaviorAA.isAssumedReadOnly()) + bool IsKnown; + if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) return Base::updateImpl(A); // If the argument is never passed through callbacks, no-alias cannot break @@ -3366,14 +3497,8 @@ struct AAIsDeadValueImpl : public AAIsDead { if (!NoUnwindAA.isKnownNoUnwind()) A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL); - const auto &MemBehaviorAA = - A.getAndUpdateAAFor<AAMemoryBehavior>(*this, CallIRP, DepClassTy::NONE); - if (MemBehaviorAA.isAssumedReadOnly()) { - if (!MemBehaviorAA.isKnownReadOnly()) - A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); - return true; - } - return false; + bool IsKnown; + return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown); } }; @@ -3699,6 +3824,7 @@ struct AAIsDeadFunction : public AAIsDead { if (!AssumedLiveBlocks.count(&BB)) { A.deleteAfterManifest(BB); ++BUILD_STAT_NAME(AAIsDead, BasicBlock); + HasChanged = ChangeStatus::CHANGED; } return HasChanged; @@ -3708,7 +3834,7 @@ struct AAIsDeadFunction : public AAIsDead { ChangeStatus updateImpl(Attributor &A) override; bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override { - return !AssumedLiveEdges.count(std::make_pair(From, To)); + return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To)); } /// See AbstractAttribute::trackStatistics() @@ -4921,14 +5047,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { AANoCapture::StateType T; // Readonly means we cannot capture through memory. - const auto &FnMemAA = - A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::NONE); - if (FnMemAA.isAssumedReadOnly()) { + bool IsKnown; + if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) { T.addKnownBits(NOT_CAPTURED_IN_MEM); - if (FnMemAA.isKnownReadOnly()) + if (IsKnown) addKnownBits(NOT_CAPTURED_IN_MEM); - else - A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL); } // Make sure all returned values are different than the underlying value. @@ -5085,7 +5208,6 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; -} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5106,7 +5228,6 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) { return true; } -namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -5266,8 +5387,6 @@ struct AAValueSimplifyImpl : AAValueSimplify { auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n"); - if (!Acc.isWrite()) - return true; if (Acc.isWrittenValueYetUndetermined()) return true; Value *Content = Acc.getWrittenValue(); @@ -5287,7 +5406,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj), DepClassTy::REQUIRED); - if (!PI.forallInterferingAccesses(L, CheckAccess)) + if (!PI.forallInterferingWrites(A, AA, L, CheckAccess)) return false; } return true; @@ -5325,9 +5444,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { if (Arg->hasByValAttr()) { // TODO: We probably need to verify synchronization is not an issue, e.g., // there is no race by not copying a constant byval. - const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), - DepClassTy::REQUIRED); - if (!MemAA.isAssumedReadOnly()) + bool IsKnown; + if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) return indicatePessimisticFixpoint(); } @@ -6827,9 +6945,8 @@ struct AAPrivatizablePtrCallSiteArgument final return indicatePessimisticFixpoint(); } - const auto &MemBehaviorAA = - A.getAAFor<AAMemoryBehavior>(*this, IRP, DepClassTy::REQUIRED); - if (!MemBehaviorAA.isAssumedReadOnly()) { + bool IsKnown; + if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); return indicatePessimisticFixpoint(); } @@ -7378,7 +7495,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U, if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } -} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, @@ -7412,7 +7528,6 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr( return S; } -namespace { struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) @@ -7657,7 +7772,8 @@ void AAMemoryLocationImpl::categorizePtrValue( << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) { + if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, + /* Intraprocedural */ true)) { LLVM_DEBUG( dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed, @@ -9411,7 +9527,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } }; - CallBase *CB = static_cast<CallBase *>(getCtxI()); + CallBase *CB = cast<CallBase>(getCtxI()); if (CB->isInlineAsm()) { setHasUnknownCallee(false, Change); @@ -9450,7 +9566,7 @@ struct AACallEdgesFunction : public AACallEdgesImpl { ChangeStatus Change = ChangeStatus::UNCHANGED; auto ProcessCallInst = [&](Instruction &Inst) { - CallBase &CB = static_cast<CallBase &>(Inst); + CallBase &CB = cast<CallBase>(Inst); auto &CBEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9481,11 +9597,39 @@ struct AACallEdgesFunction : public AACallEdgesImpl { struct AAFunctionReachabilityFunction : public AAFunctionReachability { private: struct QuerySet { - void markReachable(Function *Fn) { - Reachable.insert(Fn); - Unreachable.erase(Fn); + void markReachable(const Function &Fn) { + Reachable.insert(&Fn); + Unreachable.erase(&Fn); } + /// If there is no information about the function None is returned. + Optional<bool> isCachedReachable(const Function &Fn) { + // Assume that we can reach the function. + // TODO: Be more specific with the unknown callee. + if (CanReachUnknownCallee) + return true; + + if (Reachable.count(&Fn)) + return true; + + if (Unreachable.count(&Fn)) + return false; + + return llvm::None; + } + + /// Set of functions that we know for sure is reachable. + DenseSet<const Function *> Reachable; + + /// Set of functions that are unreachable, but might become reachable. + DenseSet<const Function *> Unreachable; + + /// If we can reach a function with a call to a unknown function we assume + /// that we can reach any function. + bool CanReachUnknownCallee = false; + }; + + struct QueryResolver : public QuerySet { ChangeStatus update(Attributor &A, const AAFunctionReachability &AA, ArrayRef<const AACallEdges *> AAEdgesList) { ChangeStatus Change = ChangeStatus::UNCHANGED; @@ -9499,31 +9643,30 @@ private: } } - for (Function *Fn : make_early_inc_range(Unreachable)) { - if (checkIfReachable(A, AA, AAEdgesList, Fn)) { + for (const Function *Fn : make_early_inc_range(Unreachable)) { + if (checkIfReachable(A, AA, AAEdgesList, *Fn)) { Change = ChangeStatus::CHANGED; - markReachable(Fn); + markReachable(*Fn); } } return Change; } - bool isReachable(Attributor &A, const AAFunctionReachability &AA, - ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) { - // Assume that we can reach the function. - // TODO: Be more specific with the unknown callee. - if (CanReachUnknownCallee) - return true; - - if (Reachable.count(Fn)) - return true; + bool isReachable(Attributor &A, AAFunctionReachability &AA, + ArrayRef<const AACallEdges *> AAEdgesList, + const Function &Fn) { + Optional<bool> Cached = isCachedReachable(Fn); + if (Cached.hasValue()) + return Cached.getValue(); - if (Unreachable.count(Fn)) - return false; + // The query was not cached, thus it is new. We need to request an update + // explicitly to make sure this the information is properly run to a + // fixpoint. + A.registerForUpdate(AA); // We need to assume that this function can't reach Fn to prevent // an infinite loop if this function is recursive. - Unreachable.insert(Fn); + Unreachable.insert(&Fn); bool Result = checkIfReachable(A, AA, AAEdgesList, Fn); if (Result) @@ -9533,13 +9676,13 @@ private: bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA, ArrayRef<const AACallEdges *> AAEdgesList, - Function *Fn) const { + const Function &Fn) const { // Handle the most trivial case first. for (auto *AAEdges : AAEdgesList) { const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges(); - if (Edges.count(Fn)) + if (Edges.count(const_cast<Function *>(&Fn))) return true; } @@ -9560,28 +9703,44 @@ private: } // The result is false for now, set dependencies and leave. - for (auto Dep : Deps) - A.recordDependence(AA, *Dep, DepClassTy::REQUIRED); + for (auto *Dep : Deps) + A.recordDependence(*Dep, AA, DepClassTy::REQUIRED); return false; } + }; - /// Set of functions that we know for sure is reachable. - DenseSet<Function *> Reachable; + /// Get call edges that can be reached by this instruction. + bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability, + const Instruction &Inst, + SmallVector<const AACallEdges *> &Result) const { + // Determine call like instructions that we can reach from the inst. + auto CheckCallBase = [&](Instruction &CBInst) { + if (!Reachability.isAssumedReachable(A, Inst, CBInst)) + return true; - /// Set of functions that are unreachable, but might become reachable. - DenseSet<Function *> Unreachable; + auto &CB = cast<CallBase>(CBInst); + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); - /// If we can reach a function with a call to a unknown function we assume - /// that we can reach any function. - bool CanReachUnknownCallee = false; - }; + Result.push_back(&AAEdges); + return true; + }; + + bool UsedAssumedInformation = false; + return A.checkForAllCallLikeInstructions(CheckCallBase, *this, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); + } public: AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A) : AAFunctionReachability(IRP, A) {} - bool canReach(Attributor &A, Function *Fn) const override { + bool canReach(Attributor &A, const Function &Fn) const override { + if (!isValidState()) + return true; + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED); @@ -9590,14 +9749,18 @@ public: // a const_cast. // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); - bool Result = - NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn); + bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis, + {&AAEdges}, Fn); return Result; } /// Can \p CB reach \p Fn - bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override { + bool canReach(Attributor &A, CallBase &CB, + const Function &Fn) const override { + if (!isValidState()) + return true; + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9606,13 +9769,40 @@ public: // a const_cast. // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); - QuerySet &CBQuery = NonConstThis->CBQueries[&CB]; + QueryResolver &CBQuery = NonConstThis->CBQueries[&CB]; - bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn); + bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn); return Result; } + bool instructionCanReach(Attributor &A, const Instruction &Inst, + const Function &Fn, + bool UseBackwards) const override { + if (!isValidState()) + return true; + + if (UseBackwards) + return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); + + const auto &Reachability = A.getAAFor<AAReachability>( + *this, IRPosition::function(*getAssociatedFunction()), + DepClassTy::REQUIRED); + + SmallVector<const AACallEdges *> CallEdges; + bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges); + // Attributor returns attributes as const, so this function has to be + // const for users of this attribute to use it without having to do + // a const_cast. + // This is a hack for us to be able to cache queries. + auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); + QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst]; + if (!AllKnown) + InstQSet.CanReachUnknownCallee = true; + + return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { const AACallEdges &AAEdges = @@ -9621,7 +9811,7 @@ public: Change |= WholeFunction.update(A, *this, {&AAEdges}); - for (auto CBPair : CBQueries) { + for (auto &CBPair : CBQueries) { const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(*CBPair.first), DepClassTy::REQUIRED); @@ -9629,6 +9819,25 @@ public: Change |= CBPair.second.update(A, *this, {&AAEdges}); } + // Update the Instruction queries. + const AAReachability *Reachability; + if (!InstQueries.empty()) { + Reachability = &A.getAAFor<AAReachability>( + *this, IRPosition::function(*getAssociatedFunction()), + DepClassTy::REQUIRED); + } + + // Check for local callbases first. + for (auto &InstPair : InstQueries) { + SmallVector<const AACallEdges *> CallEdges; + bool AllKnown = + getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); + // Update will return change if we this effects any queries. + if (!AllKnown) + InstPair.second.CanReachUnknownCallee = true; + Change |= InstPair.second.update(A, *this, CallEdges); + } + return Change; } @@ -9649,11 +9858,14 @@ private: } /// Used to answer if a the whole function can reacha a specific function. - QuerySet WholeFunction; + QueryResolver WholeFunction; /// Used to answer if a call base inside this function can reach a specific /// function. - DenseMap<CallBase *, QuerySet> CBQueries; + DenseMap<const CallBase *, QueryResolver> CBQueries; + + /// This is for instruction queries than scan "forward". + DenseMap<const Instruction *, QueryResolver> InstQueries; }; /// ---------------------- Assumption Propagation ------------------------------ @@ -9790,8 +10002,6 @@ private: } }; -} // namespace - AACallGraphNode *AACallEdgeIterator::operator*() const { return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>( &A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I)))); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 74f11fa30959..927dceec8865 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp index d3cac3efce86..1cb32e32c895 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -352,14 +352,10 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV, while (!Worklist.empty()) { Use *U = Worklist.pop_back_val(); User *V = U->getUser(); - if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V)) { - AppendUses(V); - continue; - } - if (auto *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllConstantIndices()) - return false; + auto *GEP = dyn_cast<GEPOperator>(V); + if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || + (GEP && GEP->hasAllConstantIndices())) { AppendUses(V); continue; } @@ -2229,6 +2225,13 @@ OptimizeGlobalAliases(Module &M, for (GlobalValue *GV : Used.used()) Used.compilerUsedErase(GV); + // Return whether GV is explicitly or implicitly dso_local and not replaceable + // by another definition in the current linkage unit. + auto IsModuleLocal = [](GlobalValue &GV) { + return !GlobalValue::isInterposableLinkage(GV.getLinkage()) && + (GV.isDSOLocal() || GV.isImplicitDSOLocal()); + }; + for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) { // Aliases without names cannot be referenced outside this module. if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage()) @@ -2240,18 +2243,20 @@ OptimizeGlobalAliases(Module &M, } // If the alias can change at link time, nothing can be done - bail out. - if (J.isInterposable()) + if (!IsModuleLocal(J)) continue; Constant *Aliasee = J.getAliasee(); GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); // We can't trivially replace the alias with the aliasee if the aliasee is // non-trivial in some way. We also can't replace the alias with the aliasee - // if the aliasee is interposable because aliases point to the local - // definition. + // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible + // alias can be used to access the definition as if preemption did not + // happen. // TODO: Try to handle non-zero GEPs of local aliasees. - if (!Target || Target->isInterposable()) + if (!Target || !IsModuleLocal(*Target)) continue; + Target->removeDeadConstantUsers(); // Make all users of the alias use the aliasee instead. diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp index e064fbbef595..faf7cb7d566a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches; // A command flag to be used for debugging to indirect calls from similarity // matching and outlining. extern cl::opt<bool> DisableIndirectCalls; + +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt<bool> DisableIntrinsics; + } // namespace llvm // Set to true if the user wants the ir outliner to run on linkonceodr linkage @@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; + IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index c0bb19e184d6..8e83d7bcb6c2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 68f33410c602..2d765fb6ce6d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,19 +26,25 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); +static cl::opt<bool> DisableOpenMPOptBarrierElimination( + "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, + cl::desc("Disable OpenMP optimizations that eliminate barriers."), + cl::Hidden, cl::init(false)); + static cl::opt<bool> PrintModuleAfterOptimizations( "openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), @@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged"); STATISTIC(NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory"); +STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; @@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache { RTLFunctions.insert(F); \ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ RuntimeFunctionIDMap[F] = _Enum; \ - F->removeFnAttr(Attribute::NoInline); \ auto &RFI = RFIs[_Enum]; \ RFI.Kind = _Enum; \ RFI.Name = _Name; \ @@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache { } #include "llvm/Frontend/OpenMP/OMPKinds.def" + // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` + // functions, except if `optnone` is present. + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } + // TODO: We should attach the attributes defined in OMPKinds.def. } @@ -787,6 +807,8 @@ struct OpenMPOpt { if (remarksEnabled()) analysisGlobalization(); + + Changed |= eliminateBarriers(); } else { if (PrintICVValues) printICVs(); @@ -809,6 +831,8 @@ struct OpenMPOpt { Changed = true; } } + + Changed |= eliminateBarriers(); } return Changed; @@ -1378,6 +1402,213 @@ private: return Changed; } + /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels. + /// TODO: Make this an AA and expand it to work across blocks and functions. + bool eliminateBarriers() { + bool Changed = false; + + if (DisableOpenMPOptBarrierElimination) + return /*Changed=*/false; + + if (OMPInfoCache.Kernels.empty()) + return /*Changed=*/false; + + enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT }; + + class BarrierInfo { + Instruction *I; + enum ImplicitBarrierType Type; + + public: + BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {} + BarrierInfo(Instruction &I) : I(&I) {} + + bool isImplicit() { return !I; } + + bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; } + + bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; } + + Instruction *getInstruction() { return I; } + }; + + for (Function *Kernel : OMPInfoCache.Kernels) { + for (BasicBlock &BB : *Kernel) { + SmallVector<BarrierInfo, 8> BarriersInBlock; + SmallPtrSet<Instruction *, 8> BarriersToBeDeleted; + + // Add the kernel entry implicit barrier. + if (&Kernel->getEntryBlock() == &BB) + BarriersInBlock.push_back(IBT_ENTRY); + + // Find implicit and explicit aligned barriers in the same basic block. + for (Instruction &I : BB) { + if (isa<ReturnInst>(I)) { + // Add the implicit barrier when exiting the kernel. + BarriersInBlock.push_back(IBT_EXIT); + continue; + } + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + + auto IsAlignBarrierCB = [&](CallBase &CB) { + switch (CB.getIntrinsicID()) { + case Intrinsic::nvvm_barrier0: + case Intrinsic::nvvm_barrier0_and: + case Intrinsic::nvvm_barrier0_or: + case Intrinsic::nvvm_barrier0_popc: + case Intrinsic::amdgcn_s_barrier: + return true; + default: + break; + } + return hasAssumption(CB, + KnownAssumptionString("ompx_aligned_barrier")); + }; + + if (IsAlignBarrierCB(*CB)) { + // Add an explicit aligned barrier. + BarriersInBlock.push_back(I); + } + } + + if (BarriersInBlock.size() <= 1) + continue; + + // A barrier in a barrier pair is removeable if all instructions + // between the barriers in the pair are side-effect free modulo the + // barrier operation. + auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI, + BarrierInfo *EndBI) { + assert( + !StartBI->isImplicitExit() && + "Expected start barrier to be other than a kernel exit barrier"); + assert( + !EndBI->isImplicitEntry() && + "Expected end barrier to be other than a kernel entry barrier"); + // If StarBI instructions is null then this the implicit + // kernel entry barrier, so iterate from the first instruction in the + // entry block. + Instruction *I = (StartBI->isImplicitEntry()) + ? &Kernel->getEntryBlock().front() + : StartBI->getInstruction()->getNextNode(); + assert(I && "Expected non-null start instruction"); + Instruction *E = (EndBI->isImplicitExit()) + ? I->getParent()->getTerminator() + : EndBI->getInstruction(); + assert(E && "Expected non-null end instruction"); + + for (; I != E; I = I->getNextNode()) { + if (!I->mayHaveSideEffects() && !I->mayReadFromMemory()) + continue; + + auto IsPotentiallyAffectedByBarrier = + [](Optional<MemoryLocation> Loc) { + const Value *Obj = (Loc && Loc->Ptr) + ? getUnderlyingObject(Loc->Ptr) + : nullptr; + if (!Obj) { + LLVM_DEBUG( + dbgs() + << "Access to unknown location requires barriers\n"); + return true; + } + if (isa<UndefValue>(Obj)) + return false; + if (isa<AllocaInst>(Obj)) + return false; + if (auto *GV = dyn_cast<GlobalVariable>(Obj)) { + if (GV->isConstant()) + return false; + if (GV->isThreadLocal()) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Local) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Constant) + return false; + } + LLVM_DEBUG(dbgs() << "Access to '" << *Obj + << "' requires barriers\n"); + return true; + }; + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + Optional<MemoryLocation> Loc = + MemoryLocation::getForSource(MTI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + continue; + } + + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) + continue; + + Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + + return true; + }; + + // Iterate barrier pairs and remove an explicit barrier if analysis + // deems it removeable. + for (auto *It = BarriersInBlock.begin(), + *End = BarriersInBlock.end() - 1; + It != End; ++It) { + + BarrierInfo *StartBI = It; + BarrierInfo *EndBI = (It + 1); + + // Cannot remove when both are implicit barriers, continue. + if (StartBI->isImplicit() && EndBI->isImplicit()) + continue; + + if (!IsBarrierRemoveable(StartBI, EndBI)) + continue; + + assert(!(StartBI->isImplicit() && EndBI->isImplicit()) && + "Expected at least one explicit barrier to remove."); + + // Remove an explicit barrier, check first, then second. + if (!StartBI->isImplicit()) { + LLVM_DEBUG(dbgs() << "Remove start barrier " + << *StartBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(StartBI->getInstruction()); + } else { + LLVM_DEBUG(dbgs() << "Remove end barrier " + << *EndBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(EndBI->getInstruction()); + } + } + + if (BarriersToBeDeleted.empty()) + continue; + + Changed = true; + for (Instruction *I : BarriersToBeDeleted) { + ++NumBarriersEliminated; + auto Remark = [&](OptimizationRemark OR) { + return OR << "Redundant barrier eliminated."; + }; + + if (EnableVerboseRemarks) + emitRemark<OptimizationRemark>(I, "OMP190", Remark); + I->eraseFromParent(); + } + } + } + + return Changed; + } + void analysisGlobalization() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 21395460bccb..e104ae00e916 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index daaf6cbeb3fd..52708ff2f226 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -535,7 +535,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, // the information that is needed by thin link will be written in the // given OS. if (ThinLinkOS && Index) - WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash); + writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash); } class WriteThinLTOBitcode : public ModulePass { diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6acace1d9fd4..8b30f0e989a1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -970,7 +970,7 @@ bool DevirtModule::runForTesting( if (StringRef(ClWriteSummary).endswith(".bc")) { raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None); ExitOnErr(errorCodeToError(EC)); - WriteIndexToFile(*Summary, OS); + writeIndexToFile(*Summary, OS); } else { raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF); ExitOnErr(errorCodeToError(EC)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1fb46af46bee..05b28328afbf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Fence instruction simplification Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { - // Remove identical consecutive fences. - Instruction *Next = FI.getNextNonDebugInstruction(); - if (auto *NFI = dyn_cast<FenceInst>(Next)) - if (FI.isIdenticalTo(NFI)) + auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction()); + // This check is solely here to handle arbitrary target-dependent syncscopes. + // TODO: Can remove if does not matter in practice. + if (NFI && FI.isIdenticalTo(NFI)) + return eraseInstFromFunction(FI); + + // Returns true if FI1 is identical or stronger fence than FI2. + auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { + auto FI1SyncScope = FI1->getSyncScopeID(); + // Consider same scope, where scope is global or single-thread. + if (FI1SyncScope != FI2->getSyncScopeID() || + (FI1SyncScope != SyncScope::System && + FI1SyncScope != SyncScope::SingleThread)) + return false; + + return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering()); + }; + if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) + return eraseInstFromFunction(FI); + + if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction())) + if (isIdenticalOrStrongerFence(PFI, &FI)) return eraseInstFromFunction(FI); return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index fd58a44504b3..e45be5745fcc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5882,6 +5882,55 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) { return nullptr; } +/// This function folds patterns produced by lowering of reduce idioms, such as +/// llvm.vector.reduce.and which are lowered into instruction chains. This code +/// attempts to generate fewer number of scalar comparisons instead of vector +/// comparisons when possible. +static Instruction *foldReductionIdiom(ICmpInst &I, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + if (I.getType()->isVectorTy()) + return nullptr; + ICmpInst::Predicate OuterPred, InnerPred; + Value *LHS, *RHS; + + // Match lowering of @llvm.vector.reduce.and. Turn + /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs + /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8 + /// %res = icmp <pred> i8 %scalar_ne, 0 + /// + /// into + /// + /// %lhs.scalar = bitcast <8 x i8> %lhs to i64 + /// %rhs.scalar = bitcast <8 x i8> %rhs to i64 + /// %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar + /// + /// for <pred> in {ne, eq}. + if (!match(&I, m_ICmp(OuterPred, + m_OneUse(m_BitCast(m_OneUse( + m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))), + m_Zero()))) + return nullptr; + auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType()); + if (!LHSTy || !LHSTy->getElementType()->isIntegerTy()) + return nullptr; + unsigned NumBits = + LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth(); + // TODO: Relax this to "not wider than max legal integer type"? + if (!DL.isLegalInteger(NumBits)) + return nullptr; + + if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) { + auto *ScalarTy = Builder.getIntNTy(NumBits); + LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar"); + RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar"); + return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS, + I.getName()); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6124,6 +6173,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInvariantGroup(I)) return Res; + if (Instruction *Res = foldReductionIdiom(I, Builder, DL)) + return Res; + return Changed ? &I : nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 30f6aab2114b..09694d50468f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -46,8 +46,8 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { // will be inefficient. assert(!isa<CallInst>(Inst)); - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = cast<Instruction>(V); Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc()); } } @@ -138,8 +138,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { return nullptr; SmallVector<Value *, 4> AvailablePtrVals; - for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { - Value *Arg = PN.getIncomingValue(i); + for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) { + BasicBlock *BB = std::get<0>(Incoming); + Value *Arg = std::get<1>(Incoming); // First look backward: if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) { @@ -151,8 +152,8 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { Value *ArgIntToPtr = nullptr; for (User *U : Arg->users()) { if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() && - (DT.dominates(cast<Instruction>(U), PN.getIncomingBlock(i)) || - cast<Instruction>(U)->getParent() == PN.getIncomingBlock(i))) { + (DT.dominates(cast<Instruction>(U), BB) || + cast<Instruction>(U)->getParent() == BB)) { ArgIntToPtr = U; break; } @@ -190,26 +191,21 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { "Not enough available ptr typed incoming values"); PHINode *MatchingPtrPHI = nullptr; unsigned NumPhis = 0; - for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) { + for (PHINode &PtrPHI : BB->phis()) { // FIXME: consider handling this in AggressiveInstCombine - PHINode *PtrPHI = dyn_cast<PHINode>(II); - if (!PtrPHI) - break; - if (NumPhis > MaxNumPhis) + if (NumPhis++ > MaxNumPhis) return nullptr; - if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType()) + if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType()) continue; - MatchingPtrPHI = PtrPHI; - for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) { - if (AvailablePtrVals[i] != - PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) { - MatchingPtrPHI = nullptr; - break; - } - } - - if (MatchingPtrPHI) - break; + if (any_of(zip(PN.blocks(), AvailablePtrVals), + [&](const auto &BlockAndValue) { + BasicBlock *BB = std::get<0>(BlockAndValue); + Value *V = std::get<1>(BlockAndValue); + return PtrPHI.getIncomingValueForBlock(BB) != V; + })) + continue; + MatchingPtrPHI = &PtrPHI; + break; } if (MatchingPtrPHI) { @@ -250,9 +246,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { InsertNewInstBefore(NewPtrPHI, PN); SmallDenseMap<Value *, Instruction *> Casts; - for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { - auto *IncomingBB = PN.getIncomingBlock(i); - auto *IncomingVal = AvailablePtrVals[i]; + for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) { + auto *IncomingBB = std::get<0>(Incoming); + auto *IncomingVal = std::get<1>(Incoming); if (IncomingVal->getType() == IntToPtr->getType()) { NewPtrPHI->addIncoming(IncomingVal, IncomingBB); @@ -330,8 +326,8 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) { // Scan to see if all operands are `insertvalue`'s with the same indicies, // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = dyn_cast<InsertValueInst>(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices()) return nullptr; } @@ -370,8 +366,8 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) { // Scan to see if all operands are `extractvalue`'s with the same indicies, // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = dyn_cast<ExtractValueInst>(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() || I->getAggregateOperand()->getType() != FirstEVI->getAggregateOperand()->getType()) @@ -412,8 +408,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { Type *RHSType = RHSVal->getType(); // Scan to see if all operands are the same opcode, and all have one user. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + Instruction *I = dyn_cast<Instruction>(V); if (!I || I->getOpcode() != Opc || !I->hasOneUser() || // Verify type of the LHS matches so we don't fold cmp's of different // types. @@ -461,15 +457,17 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { // Add all operands to the new PHIs. if (NewLHS || NewRHS) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + Instruction *InInst = cast<Instruction>(InVal); if (NewLHS) { Value *NewInLHS = InInst->getOperand(0); - NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + NewLHS->addIncoming(NewInLHS, InBB); } if (NewRHS) { Value *NewInRHS = InInst->getOperand(1); - NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + NewRHS->addIncoming(NewInRHS, InBB); } } } @@ -487,8 +485,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { NewBinOp->copyIRFlags(PN.getIncomingValue(0)); - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) - NewBinOp->andIRFlags(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) + NewBinOp->andIRFlags(V); PHIArgMergedDebugLoc(NewBinOp, PN); return NewBinOp; @@ -511,9 +509,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { bool AllInBounds = true; // Scan to see if all operands are the same opcode, and all have one user. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V); if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; @@ -527,8 +524,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { AllBasePointersAreAllocas = false; // Compare the operand lists. - for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { - if (FirstInst->getOperand(op) == GEP->getOperand(op)) + for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) { + if (FirstInst->getOperand(Op) == GEP->getOperand(Op)) continue; // Don't merge two GEPs when two operands differ (introducing phi nodes) @@ -536,11 +533,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { // substantially cheaper to compute for the constants, so making it a // variable index could pessimize the path. This also handles the case // for struct indices, which must always be constant. - if (isa<ConstantInt>(FirstInst->getOperand(op)) || - isa<ConstantInt>(GEP->getOperand(op))) + if (isa<ConstantInt>(FirstInst->getOperand(Op)) || + isa<ConstantInt>(GEP->getOperand(Op))) return nullptr; - if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + if (FirstInst->getOperand(Op)->getType() != + GEP->getOperand(Op)->getType()) return nullptr; // If we already needed a PHI for an earlier operand, and another operand @@ -550,7 +548,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { if (NeededPhi) return nullptr; - FixedOperands[op] = nullptr; // Needs a PHI. + FixedOperands[Op] = nullptr; // Needs a PHI. NeededPhi = true; } } @@ -569,29 +567,30 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); bool HasAnyPHIs = false; - for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { - if (FixedOperands[i]) continue; // operand doesn't need a phi. - Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, - FirstOp->getName()+".pn"); + for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) { + if (FixedOperands[I]) + continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(I); + PHINode *NewPN = + PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn"); InsertNewInstBefore(NewPN, PN); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); - OperandPhis[i] = NewPN; - FixedOperands[i] = NewPN; + OperandPhis[I] = NewPN; + FixedOperands[I] = NewPN; HasAnyPHIs = true; } - // Add all operands to the new PHIs. if (HasAnyPHIs) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); - BasicBlock *InBB = PN.getIncomingBlock(i); - - for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) - if (PHINode *OpPhi = OperandPhis[op]) - OpPhi->addIncoming(InGEP->getOperand(op), InBB); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal); + + for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op) + if (PHINode *OpPhi = OperandPhis[Op]) + OpPhi->addIncoming(InGEP->getOperand(Op), InBB); } } @@ -627,18 +626,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { - bool isAddressTaken = false; + bool IsAddressTaken = false; for (User *U : AI->users()) { if (isa<LoadInst>(U)) continue; if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } - isAddressTaken = true; + IsAddressTaken = true; break; } - if (!isAddressTaken && AI->isStaticAlloca()) + if (!IsAddressTaken && AI->isStaticAlloca()) return false; } @@ -665,9 +664,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. - bool isVolatile = FirstLI->isVolatile(); + bool IsVolatile = FirstLI->isVolatile(); Align LoadAlignment = FirstLI->getAlign(); - unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); + const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the // load and the PHI. @@ -678,22 +677,25 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && + if (IsVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUser()) + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + LoadInst *LI = dyn_cast<LoadInst>(InVal); + if (!LI || !LI->hasOneUser() || LI->isAtomic()) + return nullptr; + + // Make sure all arguments are the same type of operation. + if (LI->isVolatile() != IsVolatile || + LI->getPointerAddressSpace() != LoadAddrSpace) return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. - if (LI->isVolatile() != isVolatile || - LI->getParent() != PN.getIncomingBlock(i) || - LI->getPointerAddressSpace() != LoadAddrSpace || - !isSafeAndProfitableToSinkLoad(LI)) + if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI)) return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlign()); @@ -701,8 +703,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && - LI->getParent()->getTerminator()->getNumSuccessors() != 1) + if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; } @@ -715,7 +716,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); LoadInst *NewLI = - new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment); + new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment); unsigned KnownIDs[] = { LLVMContext::MD_tbaa, @@ -734,13 +735,15 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); // Add all operands to the new PHI and combine TBAA metadata. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i)); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + LoadInst *LI = cast<LoadInst>(V); combineMetadata(NewLI, LI, KnownIDs, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + NewPN->addIncoming(NewInVal, BB); } if (InVal) { @@ -755,7 +758,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If this was a volatile load that we are merging, make sure to loop through // and mark all the input loads as non-volatile. If we don't do this, we will // insert a new volatile load and the old ones will not be deletable. - if (isVolatile) + if (IsVolatile) for (Value *IncValue : PN.incoming_values()) cast<LoadInst>(IncValue)->setVolatile(false); @@ -830,8 +833,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { // operands, and zext the result back to the original type. PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues, Phi.getName() + ".shrunk"); - for (unsigned i = 0; i != NumIncomingValues; ++i) - NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i)); + for (unsigned I = 0; I != NumIncomingValues; ++I) + NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I)); InsertNewInstBefore(NewPhi, Phi); return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType()); @@ -885,13 +888,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { } // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + Instruction *I = dyn_cast<Instruction>(V); if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst)) return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) - return nullptr; // Cast operation must match. + return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { return nullptr; } @@ -907,11 +910,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + Value *NewInVal = cast<Instruction>(V)->getOperand(0); if (NewInVal != InVal) InVal = nullptr; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + NewPN->addIncoming(NewInVal, BB); } Value *PhiVal; @@ -937,8 +942,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); BinOp->copyIRFlags(PN.getIncomingValue(0)); - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) - BinOp->andIRFlags(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) + BinOp->andIRFlags(V); PHIArgMergedDebugLoc(BinOp, PN); return BinOp; @@ -952,8 +957,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { } /// Return true if this PHI node is only used by a PHI node cycle that is dead. -static bool DeadPHICycle(PHINode *PN, - SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) { +static bool isDeadPHICycle(PHINode *PN, + SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) { if (PN->use_empty()) return true; if (!PN->hasOneUse()) return false; @@ -966,7 +971,7 @@ static bool DeadPHICycle(PHINode *PN, return false; if (PHINode *PU = dyn_cast<PHINode>(PN->user_back())) - return DeadPHICycle(PU, PotentiallyDeadPHIs); + return isDeadPHICycle(PU, PotentiallyDeadPHIs); return false; } @@ -999,7 +1004,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. -static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) { +static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) { assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast<ConstantInt>(V)) @@ -1014,8 +1019,8 @@ struct PHIUsageRecord { unsigned Shift; // The amount shifted. Instruction *Inst; // The trunc instruction. - PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) - : PHIId(pn), Shift(Sh), Inst(User) {} + PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User) + : PHIId(Pn), Shift(Sh), Inst(User) {} bool operator<(const PHIUsageRecord &RHS) const { if (PHIId < RHS.PHIId) return true; @@ -1032,12 +1037,11 @@ struct LoweredPHIRecord { unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) - : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty) + : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. - LoweredPHIRecord(PHINode *pn, unsigned Sh) - : PN(pn), Shift(Sh), Width(0) {} + LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {} }; } // namespace @@ -1093,10 +1097,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // input is defined in the predecessor, then we won't be split the critical // edge which is required to insert a truncate. Because of this, we have to // bail out. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); - if (!II) continue; - if (II->getParent() != PN->getIncomingBlock(i)) + for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + InvokeInst *II = dyn_cast<InvokeInst>(V); + if (!II) + continue; + if (II->getParent() != BB) continue; // If we have a phi, and if it's directly in the predecessor, then we have @@ -1146,8 +1153,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { array_pod_sort(PHIUsers.begin(), PHIUsers.end()); LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) dbgs() - << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';); + for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs() + << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n'); // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. @@ -1175,8 +1182,9 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); + for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { + BasicBlock *Pred = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); Value *&PredVal = PredValues[Pred]; // If we already have a value for this predecessor, reuse it. @@ -1186,7 +1194,6 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Handle the PHI self-reuse case. - Value *InVal = PN->getIncomingValue(i); if (InVal == PN) { PredVal = EltPHI; EltPHI->addIncoming(PredVal, Pred); @@ -1207,8 +1214,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) - Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset), "extract"); + Res = Builder.CreateLShr( + Res, ConstantInt::get(InVal->getType(), Offset), "extract"); Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); @@ -1217,12 +1224,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // rewriting, we will ultimately delete the code we inserted. This // means we need to revisit that PHI to make sure we extract out the // needed piece. - if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHINode *OldInVal = dyn_cast<PHINode>(InVal)) if (PHIsInspected.count(OldInVal)) { unsigned RefPHIId = find(PHIsToSlice, OldInVal) - PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, - cast<Instruction>(Res))); + PHIUsers.push_back( + PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res))); ++UserE; } } @@ -1240,12 +1247,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) // with poison. Value *Poison = PoisonValue::get(FirstPhi.getType()); - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - replaceInstUsesWith(*PHIsToSlice[i], Poison); + for (PHINode *PHI : drop_begin(PHIsToSlice)) + replaceInstUsesWith(*PHI, Poison); return replaceInstUsesWith(FirstPhi, Poison); } -static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, +static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, const DominatorTree &DT) { // Simplify the following patterns: // if (cond) @@ -1302,8 +1309,8 @@ static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, DT.dominates(FalseOutEdge, FalseIncEdge)) // This Phi is actually equivalent to branching condition of IDom. return Cond; - else if (DT.dominates(TrueOutEdge, FalseIncEdge) && - DT.dominates(FalseOutEdge, TrueIncEdge)) { + if (DT.dominates(TrueOutEdge, FalseIncEdge) && + DT.dominates(FalseOutEdge, TrueIncEdge)) { // This Phi is actually opposite to branching condition of IDom. We invert // the condition that will potentially open up some opportunities for // sinking. @@ -1369,7 +1376,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); - if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + if (isDeadPHICycle(PU, PotentiallyDeadPHIs)) return replaceInstUsesWith(PN, PoisonValue::get(PN.getType())); } @@ -1398,15 +1405,15 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { match(CmpInst->getOperand(1), m_Zero())) { ConstantInt *NonZeroConst = nullptr; bool MadeChange = false; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator(); - Value *VA = PN.getIncomingValue(i); + for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { + Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator(); + Value *VA = PN.getIncomingValue(I); if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) { if (!NonZeroConst) - NonZeroConst = GetAnyNonZeroConstInt(PN); + NonZeroConst = getAnyNonZeroConstInt(PN); if (NonZeroConst != VA) { - replaceOperand(PN, i, NonZeroConst); + replaceOperand(PN, I, NonZeroConst); MadeChange = true; } } @@ -1457,17 +1464,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { // however. PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); if (&PN != FirstPN) - for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BBA = PN.getIncomingBlock(i); - BasicBlock *BBB = FirstPN->getIncomingBlock(i); + for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) { + BasicBlock *BBA = PN.getIncomingBlock(I); + BasicBlock *BBB = FirstPN->getIncomingBlock(I); if (BBA != BBB) { - Value *VA = PN.getIncomingValue(i); - unsigned j = PN.getBasicBlockIndex(BBB); - Value *VB = PN.getIncomingValue(j); - PN.setIncomingBlock(i, BBB); - PN.setIncomingValue(i, VB); - PN.setIncomingBlock(j, BBA); - PN.setIncomingValue(j, VA); + Value *VA = PN.getIncomingValue(I); + unsigned J = PN.getBasicBlockIndex(BBB); + Value *VB = PN.getIncomingValue(J); + PN.setIncomingBlock(I, BBB); + PN.setIncomingValue(I, VB); + PN.setIncomingBlock(J, BBA); + PN.setIncomingValue(J, VA); // NOTE: Instcombine normally would want us to "return &PN" if we // modified any of the operands of an instruction. However, since we // aren't adding or removing uses (just rearranging them) we don't do @@ -1500,7 +1507,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { return Res; // Ultimately, try to replace this Phi with a dominating condition. - if (auto *V = SimplifyUsingControlFlow(*this, PN, DT)) + if (auto *V = simplifyUsingControlFlow(*this, PN, DT)) return replaceInstUsesWith(PN, V); return nullptr; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 71a5ae24eead..3f064cfda712 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1219,7 +1219,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); I != E; I++) if (I.isStruct()) - return true;; + return true; return false; }; if (mayIndexStructType(cast<GetElementPtrInst>(*I))) @@ -1228,10 +1228,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // Conservatively track the demanded elements back through any vector // operands we may have. We know there must be at least one, or we // wouldn't have a vector result to get here. Note that we intentionally - // merge the undef bits here since gepping with either an undef base or - // index results in undef. + // merge the undef bits here since gepping with either an poison base or + // index results in poison. for (unsigned i = 0; i < I->getNumOperands(); i++) { - if (match(I->getOperand(i), m_Undef())) { + if (i == 0 ? match(I->getOperand(i), m_Undef()) + : match(I->getOperand(i), m_Poison())) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; return nullptr; @@ -1239,7 +1240,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (I->getOperand(i)->getType()->isVectorTy()) { APInt UndefEltsOp(VWidth, 0); simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); - UndefElts |= UndefEltsOp; + // gep(x, undef) is not undef, so skip considering idx ops here + // Note that we could propagate poison, but we can't distinguish between + // undef & poison bits ATM + if (i == 0) + UndefElts |= UndefEltsOp; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 029be5257694..3091905ca534 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6e72255e51ae..8f94172a6402 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1527,22 +1527,22 @@ void AddressSanitizer::getInterestingMemoryOperands( return; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand())) + if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand())) return; Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, LI->getType(), LI->getAlign()); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand())) + if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand())) return; Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, SI->getValueOperand()->getType(), SI->getAlign()); } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand())) return; Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, RMW->getValOperand()->getType(), None); } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { - if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand())) return; Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, XCHG->getCompareOperand()->getType(), None); @@ -1556,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands( return; auto BasePtr = CI->getOperand(OpOffset); - if (ignoreAccess(LI, BasePtr)) + if (ignoreAccess(I, BasePtr)) return; Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); MaybeAlign Alignment = Align(1); @@ -1568,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands( } else { for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || - ignoreAccess(LI, CI->getArgOperand(ArgNo))) + ignoreAccess(I, CI->getArgOperand(ArgNo))) continue; Type *Ty = CI->getParamByValType(ArgNo); Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index fb10a99d1338..7b3741d19a1b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -304,6 +304,7 @@ public: static bool isStandardLifetime(const AllocaInfo &AllocaInfo, const DominatorTree &DT); bool instrumentStack( + bool ShouldDetectUseAfterScope, MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument, SmallVector<Instruction *, 4> &UnrecognizedLifetimes, DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, @@ -1359,6 +1360,7 @@ bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo, } bool HWAddressSanitizer::instrumentStack( + bool ShouldDetectUseAfterScope, MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument, SmallVector<Instruction *, 4> &UnrecognizedLifetimes, DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, @@ -1410,7 +1412,7 @@ bool HWAddressSanitizer::instrumentStack( }; bool StandardLifetime = UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT()); - if (DetectUseAfterScope && StandardLifetime) { + if (ShouldDetectUseAfterScope && StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; IRB.SetInsertPoint(Start->getNextNode()); tagAlloca(IRB, AI, Tag, Size); @@ -1505,8 +1507,14 @@ bool HWAddressSanitizer::sanitizeFunction( SmallVector<Instruction *, 8> LandingPadVec; SmallVector<Instruction *, 4> UnrecognizedLifetimes; DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap; + bool CallsReturnTwice = false; for (auto &BB : F) { for (auto &Inst : BB) { + if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { + if (CI->canReturnTwice()) { + CallsReturnTwice = true; + } + } if (InstrumentStack) { if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { if (isInterestingAlloca(*AI)) @@ -1531,9 +1539,14 @@ bool HWAddressSanitizer::sanitizeFunction( } } - if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) || - isa<CleanupReturnInst>(Inst)) + if (isa<ReturnInst>(Inst)) { + if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall()) + RetVec.push_back(CI); + else + RetVec.push_back(&Inst); + } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) { RetVec.push_back(&Inst); + } if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) { for (Value *V : DVI->location_ops()) { @@ -1585,7 +1598,12 @@ bool HWAddressSanitizer::sanitizeFunction( if (!AllocasToInstrument.empty()) { Value *StackTag = ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); - instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap, + // Calls to functions that may return twice (e.g. setjmp) confuse the + // postdominator analysis, and will leave us to keep memory tagged after + // function return. Work around this by always untagging at every return + // statement if return_twice functions are called. + instrumentStack(DetectUseAfterScope && !CallsReturnTwice, + AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap, RetVec, StackTag, GetDT, GetPDT); } // Pad and align each of the allocas that we instrumented to stop small diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index ab179b03dd29..6868408ef5f5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) { } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { lowerIncrement(IPI); MadeChange = true; + } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { + lowerCover(IPC); + MadeChange = true; } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { lowerValueProfileInst(IPVP); MadeChange = true; @@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) { return !F->use_empty(); return false; }; - return containsIntrinsic(llvm::Intrinsic::instrprof_increment) || + return containsIntrinsic(llvm::Intrinsic::instrprof_cover) || + containsIntrinsic(llvm::Intrinsic::instrprof_increment) || containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) || containsIntrinsic(llvm::Intrinsic::instrprof_value_profile); } @@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Ind->eraseFromParent(); } -void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { - GlobalVariable *Counters = getOrCreateRegionCounters(Inc); - - IRBuilder<> Builder(Inc); - uint64_t Index = Inc->getIndex()->getZExtValue(); - Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(), - Counters, 0, Index); - - if (isRuntimeCounterRelocationEnabled()) { - Type *Int64Ty = Type::getInt64Ty(M->getContext()); - Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext()); - Function *Fn = Inc->getParent()->getParent(); - Instruction &I = Fn->getEntryBlock().front(); - LoadInst *LI = dyn_cast<LoadInst>(&I); - if (!LI) { - IRBuilder<> Builder(&I); - GlobalVariable *Bias = - M->getGlobalVariable(getInstrProfCounterBiasVarName()); - if (!Bias) { - // Compiler must define this variable when runtime counter relocation - // is being used. Runtime has a weak external reference that is used - // to check whether that's the case or not. - Bias = new GlobalVariable( - *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, - Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); - Bias->setVisibility(GlobalVariable::HiddenVisibility); - // A definition that's weak (linkonce_odr) without being in a COMDAT - // section wouldn't lead to link errors, but it would lead to a dead - // data word from every TU but one. Putting it in COMDAT ensures there - // will be exactly one data slot in the link. - if (TT.supportsCOMDAT()) - Bias->setComdat(M->getOrInsertComdat(Bias->getName())); - } - LI = Builder.CreateLoad(Int64Ty, Bias); +Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { + auto *Counters = getOrCreateRegionCounters(I); + IRBuilder<> Builder(I); + + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue()); + + if (!isRuntimeCounterRelocationEnabled()) + return Addr; + + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + Function *Fn = I->getParent()->getParent(); + Instruction &EntryI = Fn->getEntryBlock().front(); + LoadInst *LI = dyn_cast<LoadInst>(&EntryI); + if (!LI) { + IRBuilder<> EntryBuilder(&EntryI); + auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); + if (!Bias) { + // Compiler must define this variable when runtime counter relocation + // is being used. Runtime has a weak external reference that is used + // to check whether that's the case or not. + Bias = new GlobalVariable( + *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, + Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); + Bias->setVisibility(GlobalVariable::HiddenVisibility); + // A definition that's weak (linkonce_odr) without being in a COMDAT + // section wouldn't lead to link errors, but it would lead to a dead + // data word from every TU but one. Putting it in COMDAT ensures there + // will be exactly one data slot in the link. + if (TT.supportsCOMDAT()) + Bias->setComdat(M->getOrInsertComdat(Bias->getName())); } - auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); - Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); + LI = EntryBuilder.CreateLoad(Int64Ty, Bias); } + auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); + return Builder.CreateIntToPtr(Add, Addr->getType()); +} + +void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) { + auto *Addr = getCounterAddress(CoverInstruction); + IRBuilder<> Builder(CoverInstruction); + // We store zero to represent that this block is covered. + Builder.CreateStore(Builder.getInt8(0), Addr); + CoverInstruction->eraseFromParent(); +} + +void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { + auto *Addr = getCounterAddress(Inc); + IRBuilder<> Builder(Inc); if (Options.Atomic || AtomicCounterUpdateAll || - (Index == 0 && AtomicFirstCounter)) { + (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), MaybeAlign(), AtomicOrdering::Monotonic); } else { @@ -849,6 +864,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { } GlobalVariable * +InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name, + GlobalValue::LinkageTypes Linkage) { + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); + auto &Ctx = M->getContext(); + GlobalVariable *GV; + if (isa<InstrProfCoverInst>(Inc)) { + auto *CounterTy = Type::getInt8Ty(Ctx); + auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); + // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. + std::vector<Constant *> InitialValues(NumCounters, + Constant::getAllOnesValue(CounterTy)); + GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, + ConstantArray::get(CounterArrTy, InitialValues), + Name); + GV->setAlignment(Align(1)); + } else { + auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); + GV = new GlobalVariable(*M, CounterTy, false, Linkage, + Constant::getNullValue(CounterTy), Name); + GV->setAlignment(Align(8)); + } + return GV; +} + +GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { GlobalVariable *NamePtr = Inc->getName(); auto &PD = ProfileDataMap[NamePtr]; @@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); - ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); - // Create the counters variable. - auto *CounterPtr = - new GlobalVariable(*M, CounterTy, false, Linkage, - Constant::getNullValue(CounterTy), CntsVarName); + auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage); CounterPtr->setVisibility(Visibility); CounterPtr->setSection( getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setAlignment(Align(8)); MaybeSetComdat(CounterPtr); CounterPtr->setLinkage(Linkage); PD.RegionCounters = CounterPtr; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 8fedefccf0e1..5e078f2c4212 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index cfe993dedbc2..c51acdf52f14 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -182,6 +182,7 @@ #include "llvm/IR/ValueMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1718,11 +1719,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Figure out maximal valid memcpy alignment. const Align ArgAlign = DL.getValueOrABITypeAlignment( MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType()); - Value *CpShadowPtr = + Value *CpShadowPtr, *CpOriginPtr; + std::tie(CpShadowPtr, CpOriginPtr) = getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign, - /*isStore*/ true) - .first; - // TODO(glider): need to copy origins. + /*isStore*/ true); if (!PropagateShadow || Overflow) { // ParamTLS overflow. EntryIRB.CreateMemSet( @@ -1735,6 +1735,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { CopyAlign, Size); LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); (void)Cpy; + + if (MS.TrackOrigins) { + Value *OriginPtr = + getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset); + // FIXME: OriginSize should be: + // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment) + unsigned OriginSize = alignTo(Size, kMinOriginAlignment); + EntryIRB.CreateMemCpy( + CpOriginPtr, + /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr, + /* by origin_tls[ArgOffset] */ kMinOriginAlignment, + OriginSize); + } } } @@ -3701,7 +3714,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { insertShadowCheck(A, &CB); Size = DL.getTypeAllocSize(A->getType()); } else { - bool ArgIsInitialized = false; Value *Store = nullptr; // Compute the Shadow for arg even if it is ByVal, because // in that case getShadow() will copy the actual arg shadow to @@ -3722,10 +3734,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { MaybeAlign Alignment = llvm::None; if (ParamAlignment) Alignment = std::min(*ParamAlignment, kShadowTLSAlignment); - Value *AShadowPtr = + Value *AShadowPtr, *AOriginPtr; + std::tie(AShadowPtr, AOriginPtr) = getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment, - /*isStore*/ false) - .first; + /*isStore*/ false); if (!PropagateShadow) { Store = IRB.CreateMemSet(ArgShadowBase, Constant::getNullValue(IRB.getInt8Ty()), @@ -3733,6 +3745,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } else { Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr, Alignment, Size); + if (MS.TrackOrigins) { + Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset); + // FIXME: OriginSize should be: + // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment) + unsigned OriginSize = alignTo(Size, kMinOriginAlignment); + IRB.CreateMemCpy( + ArgOriginBase, + /* by origin_tls[ArgOffset] */ kMinOriginAlignment, + AOriginPtr, + /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize); + } } } else { // Any other parameters mean we need bit-grained tracking of uninit @@ -3743,12 +3766,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, kShadowTLSAlignment); Constant *Cst = dyn_cast<Constant>(ArgShadow); - if (Cst && Cst->isNullValue()) - ArgIsInitialized = true; + if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) { + IRB.CreateStore(getOrigin(A), + getOriginPtrForArgument(A, IRB, ArgOffset)); + } } - if (MS.TrackOrigins && !ArgIsInitialized) - IRB.CreateStore(getOrigin(A), - getOriginPtrForArgument(A, IRB, ArgOffset)); (void)Store; assert(Store != nullptr); LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c46415e5b1f4..0902a94452e3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry( "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt<bool> PGOFunctionEntryCoverage( + "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc( + "Use this option to enable function entry coverage instrumentation.")); + static cl::opt<bool> PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use.")); @@ -337,6 +342,33 @@ static const char *ValueProfKindDescr[] = { #include "llvm/ProfileData/InstrProfData.inc" }; +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + if (PGOInstrumentEntry) + ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; + if (DebugInfoCorrelate) + ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; + if (PGOFunctionEntryCoverage) + ProfileVersion |= + VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } + return IRLevelVersionVariable; +} + namespace { /// The select instruction visitor plays three roles specified @@ -469,9 +501,7 @@ private: createProfileFileNameVar(M, InstrProfileOutput); // The variable in a comdat may be discarded by LTO. Ensure the // declaration will be retained. - appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, - PGOInstrumentEntry, - DebugInfoCorrelate)); + appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); return false; } std::string InstrProfileOutput; @@ -914,22 +944,39 @@ static void instrumentOneFunc( FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); + + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); + auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), + FuncInfo.FunctionHash); + if (PGOFunctionEntryCoverage) { + assert(!IsCS && + "entry coverge does not support context-sensitive instrumentation"); + auto &EntryBB = F.getEntryBlock(); + IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); + // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, + // i32 <index>) + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), + {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); + return; + } + std::vector<BasicBlock *> InstrumentBBs; FuncInfo.getInstrumentBBs(InstrumentBBs); unsigned NumCounters = InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); uint32_t I = 0; - Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); for (auto *InstrBB : InstrumentBBs) { IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && "Cannot get the Instrumentation point"); + // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, + // i32 <index>) Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), - {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), - Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), - Builder.getInt32(I++)}); + {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } // Now instrument select instructions: @@ -1502,6 +1549,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() { } void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { + if (PGOFunctionEntryCoverage) + return; Module *M = F.getParent(); IRBuilder<> Builder(&SI); Type *Int64Ty = Builder.getInt64Ty(); @@ -1622,8 +1671,7 @@ static bool InstrumentAllFunctions( // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) - createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry, - DebugInfoCorrelate); + createIRLevelProfileFlagVar(M, /*IsCS=*/false); std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1645,9 +1693,7 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { createProfileFileNameVar(M, CSInstrName); // The variable in a comdat may be discarded by LTO. Ensure the declaration // will be retained. - appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, - PGOInstrumentEntry, - DebugInfoCorrelate)); + appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); return PreservedAnalyses::all(); } @@ -1844,6 +1890,18 @@ static bool annotateAllFunctions( ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; } + if (PGOReader->hasSingleByteCoverage()) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), + "Cannot use coverage profiles for optimization")); + return false; + } + if (PGOReader->functionEntryOnly()) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), + "Function entry profiles are not yet supported for optimization")); + return false; + } // Add the profile summary (read from the header of the indexed summary) here // so that we can use it below when reading counters (which checks if the diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp index 1ca6ddabac5b..126845bb3308 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -123,20 +123,9 @@ BundledRetainClaimRVs::~BundledRetainClaimRVs() { // can't be tail calls. if (auto *CI = dyn_cast<CallInst>(CB)) CI->setTailCallKind(CallInst::TCK_NoTail); - - if (UseMarker) { - // Remove the retainRV/claimRV function operand from the operand bundle - // to reflect the fact that the backend is responsible for emitting only - // the marker instruction, but not the retainRV/claimRV call. - OperandBundleDef OB("clang.arc.attachedcall", None); - auto *NewCB = CallBase::Create(CB, OB, CB); - CB->replaceAllUsesWith(NewCB); - CB->eraseFromParent(); - } } - if (!ContractPass || !UseMarker) - EraseInstruction(P.first); + EraseInstruction(P.first); } RVCalls.clear(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h index 2b47bec7ffe8..62f88a8cc02b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h +++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h @@ -105,8 +105,7 @@ CallInst *createCallInstWithColors( class BundledRetainClaimRVs { public: - BundledRetainClaimRVs(bool ContractPass, bool UseMarker) - : ContractPass(ContractPass), UseMarker(UseMarker) {} + BundledRetainClaimRVs(bool ContractPass) : ContractPass(ContractPass) {} ~BundledRetainClaimRVs(); /// Insert a retainRV/claimRV call to the normal destination blocks of invokes @@ -156,9 +155,6 @@ private: DenseMap<CallInst *, CallBase *> RVCalls; bool ContractPass; - - /// Indicates whether the target uses a special inline-asm marker. - bool UseMarker; }; } // end namespace objcarc diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 9e2832827686..2985ae004d3c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -434,23 +434,20 @@ bool ObjCARCContract::tryToPeepholeInstruction( LLVM_FALLTHROUGH; case ARCInstKind::RetainRV: case ARCInstKind::UnsafeClaimRV: { - bool IsInstContainedInBundle = BundledInsts->contains(Inst); - - // Return now if the target doesn't need a special inline-asm marker. Return - // true if this is a bundled retainRV/claimRV call, which is going to be - // erased at the end of this pass, to avoid undoing objc-arc-expand and + // Return true if this is a bundled retainRV/claimRV call, which is always + // redundant with the attachedcall in the bundle, and is going to be erased + // at the end of this pass. This avoids undoing objc-arc-expand and // replacing uses of the retainRV/claimRV call's argument with its result. - if (!RVInstMarker) - return IsInstContainedInBundle; - - // The target needs a special inline-asm marker. + if (BundledInsts->contains(Inst)) + return true; - // We don't have to emit the marker if this is a bundled call since the - // backend is responsible for emitting it. Return false to undo - // objc-arc-expand. - if (IsInstContainedInBundle) + // If this isn't a bundled call, and the target doesn't need a special + // inline-asm marker, we're done: return now, and undo objc-arc-expand. + if (!RVInstMarker) return false; + // The target needs a special inline-asm marker. Insert it. + BasicBlock::iterator BBI = Inst->getIterator(); BasicBlock *InstParent = Inst->getParent(); @@ -548,7 +545,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) { AA = A; DT = D; PA.setAA(A); - BundledRetainClaimRVs BRV(true, RVInstMarker); + BundledRetainClaimRVs BRV(/*ContractPass=*/true); BundledInsts = &BRV; std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT); diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index b6dc97f1e43f..e1a000b31cf9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -2459,7 +2459,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) { return false; Changed = CFGChanged = false; - BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent())); + BundledRetainClaimRVs BRV(/*ContractPass=*/false); BundledInsts = &BRV; LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index dda1a2f08076..143a78f604fc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -357,7 +357,7 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; // This map keeps track of all the new definitions for an instruction. This // information is needed when restoring SSA form after cloning blocks. -typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; +typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { OS << "< "; @@ -1126,6 +1126,9 @@ private: /// Add new value mappings to the DefMap to keep track of all new definitions /// for a particular instruction. These will be used while updating SSA form. void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { + SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; + NewDefsVector.reserve(VMap.size()); + for (auto Entry : VMap) { Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); @@ -1138,11 +1141,18 @@ private: if (!Cloned) continue; - if (NewDefs.find(Inst) == NewDefs.end()) - NewDefs[Inst] = {Cloned}; - else - NewDefs[Inst].push_back(Cloned); + NewDefsVector.push_back({Inst, Cloned}); } + + // Sort the defs to get deterministic insertion order into NewDefs. + sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { + if (LHS.first == RHS.first) + return LHS.second->comesBefore(RHS.second); + return LHS.first->comesBefore(RHS.first); + }); + + for (const auto &KV : NewDefsVector) + NewDefs[KV.first].push_back(KV.second); } /// Update the last branch of a particular cloned path to point to the correct diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp index ca19913e37ee..bf4d275e04ba 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -192,6 +192,7 @@ struct FusionCandidate { GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { + assert(DT && "Expected non-null DT!"); // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect // the memory reads and writes If any instructions that prevent fusion are @@ -767,7 +768,7 @@ private: LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount << " iterations of the first loop. \n"); - FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true); + FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true); if (FC0.Peeled) { LLVM_DEBUG(dbgs() << "Done Peeling\n"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 35ba4e2b4032..318c4c06f0f7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1172,8 +1172,15 @@ bool LoopIdiomRecognize::processLoopStridedStore( CallInst *NewCall; if (SplatValue) { - NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, - MaybeAlign(StoreAlignment)); + AAMDNodes AATags = TheStore->getAAMetadata(); + if (auto CI = dyn_cast<ConstantInt>(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + + NewCall = Builder.CreateMemSet( + BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; @@ -1452,17 +1459,28 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + AAMDNodes AATags = TheLoad->getAAMetadata(); + AAMDNodes StoreAATags = TheStore->getAAMetadata(); + AATags = AATags.merge(StoreAATags); + if (auto CI = dyn_cast<ConstantInt>(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must necessarily be unordered // by previous checks. if (!TheStore->isAtomic() && !TheLoad->isAtomic()) { if (UseMemMove) - NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + NewCall = Builder.CreateMemMove( + StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes, + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); else - NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + NewCall = + Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, + NumBytes, /*isVolatile=*/false, AATags.TBAA, + AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } else { // For now don't support unordered atomic memmove. if (UseMemMove) @@ -1486,7 +1504,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(), - NumBytes, StoreSize); + NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope, + AATags.NoAlias); } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 728d63fe2847..d3fcba10c275 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -468,7 +468,7 @@ private: LI.removeBlock(BB); } - DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true); + detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true); DTU.applyUpdates(DTUpdates); DTUpdates.clear(); for (auto *BB : DeadLoopBlocks) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 022d9c7abc8c..9beb2281cf0f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1281,7 +1281,7 @@ static LoopUnrollResult tryToUnrollLoop( << " iterations"; }); - if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) { + if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) { simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI); // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 8f1d0181ee5b..296becb31e8f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1339,16 +1339,21 @@ public: // Copy load operand to new alloca. Builder.SetInsertPoint(Copy, Copy->begin()); - AllocaInst *NewLd = - Builder.CreateAlloca(Load->getType(), Load->getPointerAddressSpace()); - Builder.CreateMemCpy(NewLd, NewLd->getAlign(), - Load->getPointerOperand(), Load->getAlign(), - LoadLoc.Size.getValue()); + auto *VT = cast<FixedVectorType>(Load->getType()); + // Use an array type for the alloca, to avoid potentially huge alignment + // requirements for large vector types. + auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements()); + AllocaInst *Alloca = + Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace()); + Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo()); + + Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(), + Load->getAlign(), LoadLoc.Size.getValue()); Builder.SetInsertPoint(Fusion, Fusion->begin()); PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3); PHI->addIncoming(Load->getPointerOperand(), Check0); PHI->addIncoming(Load->getPointerOperand(), Check1); - PHI->addIncoming(NewLd, Copy); + PHI->addIncoming(BC, Copy); // Adjust DT. DTUpdates.push_back({DT->Insert, Check0, Check1}); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp index 2476e6c408b1..f35c9212a6f9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -77,6 +77,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -1736,18 +1737,18 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, if (Filtered.empty()) { // If it has undef or poison at this point, it means there are no-non-undef // arguments, and thus, the value of the phi node must be undef. - if (HasPoison && !HasUndef) { - LLVM_DEBUG( - dbgs() << "PHI Node " << *I - << " has no non-poison arguments, valuing it as poison\n"); - return createConstantExpression(PoisonValue::get(I->getType())); - } if (HasUndef) { LLVM_DEBUG( dbgs() << "PHI Node " << *I << " has no non-undef arguments, valuing it as undef\n"); return createConstantExpression(UndefValue::get(I->getType())); } + if (HasPoison) { + LLVM_DEBUG( + dbgs() << "PHI Node " << *I + << " has no non-poison arguments, valuing it as poison\n"); + return createConstantExpression(PoisonValue::get(I->getType())); + } LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); @@ -1757,6 +1758,11 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. if (llvm::all_of(Filtered, [&](Value *Arg) { return Arg == AllSameValue; })) { + // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef + // in the worst case). + if (HasUndef && !isGuaranteedNotToBePoison(AllSameValue, AC, nullptr, DT)) + return E; + // In LLVM's non-standard representation of phi nodes, it's possible to have // phi nodes with cycles (IE dependent on other phis that are .... dependent // on the original phi node), especially in weird CFG's where some arguments @@ -1764,8 +1770,8 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, // infinite loops during evaluation. We work around this by not trying to // really evaluate them independently, but instead using a variable // expression to say if one is equivalent to the other. - // We also special case undef, so that if we have an undef, we can't use the - // common value unless it dominates the phi block. + // We also special case undef/poison, so that if we have an undef, we can't + // use the common value unless it dominates the phi block. if (HasPoison || HasUndef) { // If we have undef and at least one other value, this is really a // multivalued phi, and we need to know if it's cycle free in order to @@ -2853,14 +2859,14 @@ NewGVN::makePossiblePHIOfOps(Instruction *I, } // The algorithm initially places the values of the routine in the TOP -// congruence class. The leader of TOP is the undetermined value `undef`. +// congruence class. The leader of TOP is the undetermined value `poison`. // When the algorithm has finished, values still in TOP are unreachable. void NewGVN::initializeCongruenceClasses(Function &F) { NextCongruenceNum = 0; // Note that even though we use the live on entry def as a representative // MemoryAccess, it is *not* the same as the actual live on entry def. We - // have no real equivalemnt to undef for MemoryAccesses, and so we really + // have no real equivalent to poison for MemoryAccesses, and so we really // should be checking whether the MemoryAccess is top if we want to know if it // is equivalent to everything. Otherwise, what this really signifies is that // the access "it reaches all the way back to the beginning of the function" @@ -3031,7 +3037,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { !isMemoryAccessTOP(cast<MemoryAccess>(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); - // If all that is left is nothing, our memoryphi is undef. We keep it as + // If all that is left is nothing, our memoryphi is poison. We keep it as // InitialClass. Note: The only case this should happen is if we have at // least one self-argument. if (Filtered.begin() == Filtered.end()) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3da367341d2a..b795ad3899bc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -258,6 +258,7 @@ struct GCPtrLivenessData { // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type using DefiningValueMapTy = MapVector<Value *, Value *>; +using PointerToBaseTy = MapVector<Value *, Value *>; using StatepointLiveSetTy = SetVector<Value *>; using RematerializedValueMapTy = MapVector<AssertingVH<Instruction>, AssertingVH<Value>>; @@ -266,9 +267,6 @@ struct PartiallyConstructedSafepointRecord { /// The set of values known to be live across this safepoint StatepointLiveSetTy LiveSet; - /// Mapping from live pointers to a base-defining-value - MapVector<Value *, Value *> PointerToBase; - /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. GCStatepointInst *StatepointToken; @@ -1255,10 +1253,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // post condition: PointerToBase contains one (derived, base) pair for every // pointer in live. Note that derived can be equal to base if the original // pointer was a base pointer. -static void -findBasePointers(const StatepointLiveSetTy &live, - MapVector<Value *, Value *> &PointerToBase, - DominatorTree *DT, DefiningValueMapTy &DVCache) { +static void findBasePointers(const StatepointLiveSetTy &live, + PointerToBaseTy &PointerToBase, DominatorTree *DT, + DefiningValueMapTy &DVCache) { for (Value *ptr : live) { Value *base = findBasePointer(ptr, DVCache); assert(base && "failed to find base pointer"); @@ -1274,8 +1271,8 @@ findBasePointers(const StatepointLiveSetTy &live, /// parse point. static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, CallBase *Call, - PartiallyConstructedSafepointRecord &result) { - MapVector<Value *, Value *> PointerToBase; + PartiallyConstructedSafepointRecord &result, + PointerToBaseTy &PointerToBase) { StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet; // We assume that all pointers passed to deopt are base pointers; as an // optimization, we can use this to avoid seperately materializing the base @@ -1290,37 +1287,27 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, PointerToBase[V] = V; } findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache); - - if (PrintBasePointers) { - errs() << "Base Pairs (w/o Relocation):\n"; - for (auto &Pair : PointerToBase) { - errs() << " derived "; - Pair.first->printAsOperand(errs(), false); - errs() << " base "; - Pair.second->printAsOperand(errs(), false); - errs() << "\n";; - } - } - - result.PointerToBase = PointerToBase; } /// Given an updated version of the dataflow liveness results, update the /// liveset and base pointer maps for the call site CS. static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &result); + PartiallyConstructedSafepointRecord &result, + PointerToBaseTy &PointerToBase); static void recomputeLiveInValues( Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate, - MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + MutableArrayRef<struct PartiallyConstructedSafepointRecord> records, + PointerToBaseTy &PointerToBase) { // TODO-PERF: reuse the original liveness, then simply run the dataflow // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; computeLiveInValues(DT, F, RevisedLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info); + recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, + PointerToBase); } } @@ -1537,7 +1524,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ const SmallVectorImpl<Value *> &BasePtrs, const SmallVectorImpl<Value *> &LiveVariables, PartiallyConstructedSafepointRecord &Result, - std::vector<DeferredReplacement> &Replacements) { + std::vector<DeferredReplacement> &Replacements, + const PointerToBaseTy &PointerToBase) { assert(BasePtrs.size() == LiveVariables.size()); // Then go ahead and use the builder do actually do the inserts. We insert @@ -1626,10 +1614,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ auto &Context = Call->getContext(); auto &DL = Call->getModule()->getDataLayout(); auto GetBaseAndOffset = [&](Value *Derived) { - assert(Result.PointerToBase.count(Derived)); + assert(PointerToBase.count(Derived)); unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = Result.PointerToBase.find(Derived)->second; + Value *Base = PointerToBase.find(Derived)->second; Value *Base_int = Builder.CreatePtrToInt( Base, Type::getIntNTy(Context, IntPtrSize)); Value *Derived_int = Builder.CreatePtrToInt( @@ -1819,9 +1807,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, - std::vector<DeferredReplacement> &Replacements) { + std::vector<DeferredReplacement> &Replacements, + const PointerToBaseTy &PointerToBase) { const auto &LiveSet = Result.LiveSet; - const auto &PointerToBase = Result.PointerToBase; // Convert to vector for efficient cross referencing. SmallVector<Value *, 64> BaseVec, LiveVec; @@ -1836,7 +1824,8 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call, assert(LiveVec.size() == BaseVec.size()); // Do the actual rewriting and delete the old statepoint - makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements); + makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements, + PointerToBase); } // Helper function for the relocationViaAlloca. @@ -2238,6 +2227,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh // relocated values we don't do any user adjustments here. static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, + PointerToBaseTy &PointerToBase, TargetTransformInfo &TTI) { const unsigned int ChainLengthThreshold = 10; @@ -2248,7 +2238,7 @@ static void rematerializeLiveValues(CallBase *Call, for (Value *LiveValue: Info.LiveSet) { // For each live pointer find its defining chain SmallVector<Instruction *, 3> ChainToBase; - assert(Info.PointerToBase.count(LiveValue)); + assert(PointerToBase.count(LiveValue)); Value *RootOfChain = findRematerializableChainToBasePointer(ChainToBase, LiveValue); @@ -2260,9 +2250,9 @@ static void rematerializeLiveValues(CallBase *Call, // Handle the scenario where the RootOfChain is not equal to the // Base Value, but they are essentially the same phi values. - if (RootOfChain != Info.PointerToBase[LiveValue]) { + if (RootOfChain != PointerToBase[LiveValue]) { PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain); - PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]); + PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]); if (!OrigRootPhi || !AlternateRootPhi) continue; // PHI nodes that have the same incoming values, and belonging to the same @@ -2362,7 +2352,7 @@ static void rematerializeLiveValues(CallBase *Call, Instruction *InsertBefore = Call->getNextNode(); assert(InsertBefore); Instruction *RematerializedValue = rematerializeChain( - InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + InsertBefore, RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[RematerializedValue] = LiveValue; } else { auto *Invoke = cast<InvokeInst>(Call); @@ -2373,9 +2363,9 @@ static void rematerializeLiveValues(CallBase *Call, &*Invoke->getUnwindDest()->getFirstInsertionPt(); Instruction *NormalRematerializedValue = rematerializeChain( - NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]); Instruction *UnwindRematerializedValue = rematerializeChain( - UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[NormalRematerializedValue] = LiveValue; Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; @@ -2491,10 +2481,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // site. findLiveReferences(F, DT, ToUpdate, Records); + /// Global mapping from live pointers to a base-defining-value. + PointerToBaseTy PointerToBase; + // B) Find the base pointers for each live pointer for (size_t i = 0; i < Records.size(); i++) { PartiallyConstructedSafepointRecord &info = Records[i]; - findBasePointers(DT, DVCache, ToUpdate[i], info); + findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase); + } + if (PrintBasePointers) { + errs() << "Base Pairs (w/o Relocation):\n"; + for (auto &Pair : PointerToBase) { + errs() << " derived "; + Pair.first->printAsOperand(errs(), false); + errs() << " base "; + Pair.second->printAsOperand(errs(), false); + errs() << "\n"; + ; + } } // The base phi insertion logic (for any safepoint) may have inserted new @@ -2515,8 +2519,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, PartiallyConstructedSafepointRecord &Info = Records[i]; SmallVector<Value *, 128> Bases; - for (auto Pair : Info.PointerToBase) - Bases.push_back(Pair.second); + for (auto *Derived : Info.LiveSet) { + assert(PointerToBase.count(Derived) && "Missed base for derived pointer"); + Bases.push_back(PointerToBase[Derived]); + } insertUseHolderAfter(ToUpdate[i], Bases, Holders); } @@ -2524,18 +2530,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // By selecting base pointers, we've effectively inserted new uses. Thus, we // need to rerun liveness. We may *also* have inserted new defs, but that's // not the key issue. - recomputeLiveInValues(F, DT, ToUpdate, Records); + recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase); if (PrintBasePointers) { - for (auto &Info : Records) { - errs() << "Base Pairs: (w/Relocation)\n"; - for (auto Pair : Info.PointerToBase) { - errs() << " derived "; - Pair.first->printAsOperand(errs(), false); - errs() << " base "; - Pair.second->printAsOperand(errs(), false); - errs() << "\n"; - } + errs() << "Base Pairs: (w/Relocation)\n"; + for (auto Pair : PointerToBase) { + errs() << " derived "; + Pair.first->printAsOperand(errs(), false); + errs() << " base "; + Pair.second->printAsOperand(errs(), false); + errs() << "\n"; } } @@ -2547,10 +2551,12 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // Note that the relocation placement code relies on this filtering for // correctness as it expects the base to be in the liveset, which isn't true // if the base is constant. - for (auto &Info : Records) - for (auto &BasePair : Info.PointerToBase) - if (isa<Constant>(BasePair.second)) - Info.LiveSet.remove(BasePair.first); + for (auto &Info : Records) { + Info.LiveSet.remove_if([&](Value *LiveV) { + assert(PointerToBase.count(LiveV) && "Missed base for derived pointer"); + return isa<Constant>(PointerToBase[LiveV]); + }); + } for (CallInst *CI : Holders) CI->eraseFromParent(); @@ -2561,7 +2567,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // some values instead of relocating them. This is purely an optimization and // does not influence correctness. for (size_t i = 0; i < Records.size(); i++) - rematerializeLiveValues(ToUpdate[i], Records[i], TTI); + rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI); // We need this to safely RAUW and delete call or invoke return values that // may themselves be live over a statepoint. For details, please see usage in @@ -2575,7 +2581,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // previous statepoint can not be a live variable, thus we can and remove // the old statepoint calls as we go.) for (size_t i = 0; i < Records.size(); i++) - makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements); + makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements, + PointerToBase); ToUpdate.clear(); // prevent accident use of invalid calls. @@ -2594,8 +2601,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // these live sets, and migrate to using that data structure from this point // onward. Info.LiveSet.clear(); - Info.PointerToBase.clear(); } + PointerToBase.clear(); // Do all the fixups of the original live variables to their relocated selves SmallVector<Value *, 128> Live; @@ -3115,35 +3122,15 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &Info) { + PartiallyConstructedSafepointRecord &Info, + PointerToBaseTy &PointerToBase) { StatepointLiveSetTy Updated; findLiveSetAtInst(Call, RevisedLivenessData, Updated); // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) - Info.PointerToBase.insert({V, V}); - -#ifndef NDEBUG - for (auto V : Updated) - assert(Info.PointerToBase.count(V) && - "Must be able to find base for live value!"); -#endif - - // Remove any stale base mappings - this can happen since our liveness is - // more precise then the one inherent in the base pointer analysis. - DenseSet<Value *> ToErase; - for (auto KVPair : Info.PointerToBase) - if (!Updated.count(KVPair.first)) - ToErase.insert(KVPair.first); - - for (auto *V : ToErase) - Info.PointerToBase.erase(V); - -#ifndef NDEBUG - for (auto KVPair : Info.PointerToBase) - assert(Updated.count(KVPair.first) && "record for non-live value"); -#endif + PointerToBase.insert({ V, V }); Info.LiveSet = Updated; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp index 35497ae5ed9a..8be8946702be 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index ac580b4161f4..b3a445368537 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -276,6 +276,8 @@ class StructurizeCFG { void insertConditions(bool Loops); + void simplifyConditions(); + void delPhiValues(BasicBlock *From, BasicBlock *To); void addPhiValues(BasicBlock *From, BasicBlock *To); @@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) { } } +/// Simplify any inverted conditions that were built by buildConditions. +void StructurizeCFG::simplifyConditions() { + SmallVector<Instruction *> InstToErase; + for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { + auto &Preds = I.second; + for (auto &J : Preds) { + auto &Cond = J.second; + Instruction *Inverted; + if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && + !Cond->use_empty()) { + if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { + InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); + Cond->replaceAllUsesWith(InvertedCmp); + InstToErase.push_back(cast<Instruction>(Cond)); + } + } + } + } + for (auto *I : InstToErase) + I->eraseFromParent(); +} + /// Remove all PHI values coming from "From" into "To" and remember /// them in DeletedPhis void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { @@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { createFlow(); insertConditions(false); insertConditions(true); + simplifyConditions(); setPhiValues(); simplifyAffectedPhis(); rebuildSSA(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index d6d6b1a7fa09..15c4a64eb794 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -59,7 +59,7 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth( "is followed by a block that either has a terminating " "deoptimizing call or is terminated with an unreachable")); -void llvm::DetatchDeadBlocks( +void llvm::detachDeadBlocks( ArrayRef<BasicBlock *> BBs, SmallVectorImpl<DominatorTree::UpdateType> *Updates, bool KeepOneInputPHIs) { @@ -110,7 +110,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, #endif SmallVector<DominatorTree::UpdateType, 4> Updates; - DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); + detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); if (DTU) DTU->applyUpdates(Updates); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp index 048e691e33cf..86413df664a0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -694,38 +694,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, VMap[OrigV] = I; } + // Simplify conditional branches and switches with a constant operand. We try + // to prune these out when cloning, but if the simplification required + // looking through PHI nodes, those are only available after forming the full + // basic block. That may leave some here, and we still want to prune the dead + // code as early as possible. + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + ConstantFoldTerminator(&BB); + + // Some blocks may have become unreachable as a result. Find and delete them. + { + SmallPtrSet<BasicBlock *, 16> ReachableBlocks; + SmallVector<BasicBlock *, 16> Worklist; + Worklist.push_back(&*Begin); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (ReachableBlocks.insert(BB).second) + append_range(Worklist, successors(BB)); + } + + SmallVector<BasicBlock *, 16> UnreachableBlocks; + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + if (!ReachableBlocks.contains(&BB)) + UnreachableBlocks.push_back(&BB); + DeleteDeadBlocks(UnreachableBlocks); + } + // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { - // We need to simplify conditional branches and switches with a constant - // operand. We try to prune these out when cloning, but if the - // simplification required looking through PHI nodes, those are only - // available after forming the full basic block. That may leave some here, - // and we still want to prune the dead code as early as possible. - // - // Do the folding before we check if the block is dead since we want code - // like - // bb: - // br i1 undef, label %bb, label %bb - // to be simplified to - // bb: - // br label %bb - // before we call I->getSinglePredecessor(). - ConstantFoldTerminator(&*I); - - // Check if this block has become dead during inlining or other - // simplifications. Note that the first block will appear dead, as it has - // not yet been wired up properly. - if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) { - BasicBlock *DeadBB = &*I++; - DeleteDeadBlock(DeadBB); - continue; - } - BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 24cd5747c5a4..cec159f6a448 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -857,8 +858,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, (ParamTy.size() + AggParamTy.size()) == (inputs.size() + outputs.size()) && "Number of scalar and aggregate params does not match inputs, outputs"); - assert(StructValues.empty() || - AggregateArgs && "Expeced StructValues only with AggregateArgs set"); + assert((StructValues.empty() || AggregateArgs) && + "Expeced StructValues only with AggregateArgs set"); // Concatenate scalar and aggregate params in ParamTy. size_t NumScalarParams = ParamTy.size(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp index f8ec8c6ad426..c1c5f5cc879f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -65,15 +65,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, for (const Use &U : V->uses()) { const User *UR = U.getUser(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { - // If the result of the constantexpr isn't pointer type, then we won't - // know to expect it in various places. Just reject early. - if (!isa<PointerType>(CE->getType())) - return true; - - // FIXME: Do we need to add constexpr selects to VisitedUsers? - if (analyzeGlobalAux(CE, GS, VisitedUsers)) - return true; + if (const Constant *C = dyn_cast<Constant>(UR)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (CE && isa<PointerType>(CE->getType())) { + // Recursively analyze pointer-typed constant expressions. + // FIXME: Do we need to add constexpr selects to VisitedUsers? + if (analyzeGlobalAux(CE, GS, VisitedUsers)) + return true; + } else { + // Ignore dead constant users. + if (!isSafeToDestroyConstant(C)) + return true; + } } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); @@ -169,10 +172,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast<Constant>(UR)) { - // We might have a dead and dangling constant hanging off of here. - if (!isSafeToDestroyConstant(C)) - return true; } else { // Otherwise must be some other user. return true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index c9f872f5b7e1..923bcc781e47 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -671,12 +672,9 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // edge from this block. SmallVector<Value *, 8> UnwindDestPHIValues; BasicBlock *InvokeBB = II->getParent(); - for (Instruction &I : *UnwindDest) { + for (PHINode &PHI : UnwindDest->phis()) { // Save the value to use for this edge. - PHINode *PHI = dyn_cast<PHINode>(&I); - if (!PHI) - break; - UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); } // Add incoming-PHI values to the unwind destination block for the given basic diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index 9f33d2f82732..9a10535c9310 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp index 92333408aaef..5b66da1e7082 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -737,7 +737,7 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); @@ -756,23 +756,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // routes which can lead to the exit: we can reach it from the peeled // iterations too. DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom; - if (DT) { - for (auto *BB : L->blocks()) { - auto *BBDomNode = DT->getNode(BB); - SmallVector<BasicBlock *, 16> ChildrenToUpdate; - for (auto *ChildDomNode : BBDomNode->children()) { - auto *ChildBB = ChildDomNode->getBlock(); - if (!L->contains(ChildBB)) - ChildrenToUpdate.push_back(ChildBB); - } - // The new idom of the block will be the nearest common dominator - // of all copies of the previous idom. This is equivalent to the - // nearest common dominator of the previous idom and the first latch, - // which dominates all copies of the previous idom. - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch); - for (auto *ChildBB : ChildrenToUpdate) - NonLoopBlocksIDom[ChildBB] = NewIDom; + for (auto *BB : L->blocks()) { + auto *BBDomNode = DT.getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->children()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch); + for (auto *ChildBB : ChildrenToUpdate) + NonLoopBlocksIDom[ChildBB] = NewIDom; } Function *F = Header->getParent(); @@ -822,11 +820,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // If (cond) goto Header // Exit: - BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI); BasicBlock *InsertBot = - SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); + SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); BasicBlock *NewPreHeader = - SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); @@ -852,23 +850,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ValueToValueMapTy VMap; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, DT, LI, + LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); - if (DT) { - // Update IDoms of the blocks reachable through exits. - if (Iter == 0) - for (auto BBIDom : NonLoopBlocksIDom) - DT->changeImmediateDominator(BBIDom.first, - cast<BasicBlock>(LVMap[BBIDom.second])); + // Update IDoms of the blocks reachable through exits. + if (Iter == 0) + for (auto BBIDom : NonLoopBlocksIDom) + DT.changeImmediateDominator(BBIDom.first, + cast<BasicBlock>(LVMap[BBIDom.second])); #ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif - } auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); @@ -877,7 +873,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; - InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), @@ -912,10 +908,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SE->forgetTopmostLoop(L); // Finally DomtTree must be correct. - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); + simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 7c9ab7f6ca2c..d6a6be2762c7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -264,3 +264,16 @@ void VFABI::setVectorVariantNames( CI->addFnAttr( Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); } + +void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, + StringRef SectionName) { + // Embed the buffer into the module. + Constant *ModuleConstant = ConstantDataArray::get( + M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); + GlobalVariable *GV = new GlobalVariable( + M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); + GV->setSection(SectionName); + + appendToCompilerUsed(M, GV); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index 7083789267d9..deaee467531d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/MD5.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index b35ab57e0d87..01b433b4782a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,13 +25,13 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp index 3ca36a1cad91..43eb5c87acee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp @@ -16,6 +16,7 @@ #include "llvm-c/Transforms/Utils.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp index bbe6b3dc23b3..637181722f63 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "vncoerce" diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d11f4146b590..3290439ecd07 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -632,13 +632,6 @@ protected: Instruction *EntryVal, VPValue *Def, VPTransformState &State); - /// Returns true if an instruction \p I should be scalarized instead of - /// vectorized for the chosen vectorization factor. - bool shouldScalarizeInstruction(Instruction *I) const; - - /// Returns true if we should generate a scalar version of \p IV. - bool needsScalarInduction(Instruction *IV) const; - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( VecInd->addIncoming(LastInduction, LoopVectorLatch); } -bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { - return Cost->isScalarAfterVectorization(I, VF) || - Cost->isProfitableToScalarize(I, VF); -} - -bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { - if (shouldScalarizeInstruction(IV)) - return true; - auto isScalarInst = [&](User *U) -> bool { - auto *I = cast<Instruction>(U); - return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); - }; - return llvm::any_of(IV->users(), isScalarInst); -} - void InnerLoopVectorizer::widenIntOrFpInduction( PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, Value *CanonicalIV) { @@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction( return ScalarIV; }; - // Create the vector values from the scalar IV, in the absence of creating a - // vector IV. - auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { - Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx; - if (Step->getType()->isFloatingPointTy()) - StartIdx = - getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); - else - StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); - - Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(Def, EntryPart, Part); - if (Trunc) - addMetadata(EntryPart, Trunc); - } - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) @@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction( return; } - // Determine if we want a scalar version of the induction variable. This is - // true if the induction variable itself is not widened, or if it has at - // least one user in the loop that is not widened. - auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { + // Create a new independent vector induction variable, if one is needed. + if (Def->needsVectorIV()) createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - return; - } - // Try to create a new independent vector induction variable. If we can't - // create the phi node, we will splat the scalar induction variable in each - // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); + if (Def->needsScalarIV()) { // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. + Value *ScalarIV = CreateScalarIV(Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. - Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, @@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } // Determine the number of scalars we need to generate for each unroll - // iteration. If EntryVal is uniform, we only need to generate the first - // lane. Otherwise, we generate all VF values. - bool IsUniform = - Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF); - unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue(); + // iteration. + bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def); + unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); // Compute the scalar steps and save the results in State. Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(), ScalarIVTy->getScalarSizeInBits()); Type *VecIVTy = nullptr; Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; - if (!IsUniform && State.VF.isScalable()) { + if (!FirstLaneOnly && State.VF.isScalable()) { VecIVTy = VectorType::get(ScalarIVTy, State.VF); UnitStepVec = Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); @@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, for (unsigned Part = 0; Part < State.UF; ++Part) { Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); - if (!IsUniform && State.VF.isScalable()) { + if (!FirstLaneOnly && State.VF.isScalable()) { auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); if (ScalarIVTy->isFloatingPointTy()) @@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. - bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF); + bool IsUniform = vputils::onlyFirstLaneUsed(PhiR); assert((IsUniform || !State.VF.isScalable()) && "Cannot scalarize a scalable VF"); unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); @@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable( // consider interleaving beneficial (eg. MVE). if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1) return false; - if (VF.getFixedValue() >= EpilogueVectorizationMinVF) + // FIXME: We should consider changing the threshold for scalable + // vectors to take VScaleForTuning into account. + if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF) return true; return false; } @@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } - auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); - if (MainLoopVF.isScalable()) - LLVM_DEBUG( - dbgs() << "LEV: Epilogue vectorization using scalable vectors not " - "yet supported. Converting to fixed-width (VF=" - << FixedMainLoopVF << ") instead\n"); - - if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) { + if (!isEpilogueVectorizationProfitable(MainLoopVF)) { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for " "this loop\n"); return Result; } for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) && - (Result.Width.getFixedValue() == 1 || - isMoreProfitable(NextVF, Result)) && + if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; if (Result != VectorizationFactor::Disabled()) LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = " - << Result.Width.getFixedValue() << "\n";); + << Result.Width << "\n";); return Result; } @@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, Mask, Consecutive, Reverse); } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef<VPValue *> Operands) const { +static VPWidenIntOrFpInductionRecipe * +createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, + VPValue *Start, const InductionDescriptor &IndDesc, + LoopVectorizationCostModel &CM, Loop &OrigLoop, + VFRange &Range) { + // Returns true if an instruction \p I should be scalarized instead of + // vectorized for the chosen vectorization factor. + auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { + return CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF); + }; + + bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + // Returns true if we should generate a scalar version of \p IV. + if (ShouldScalarizeInstruction(PhiOrTrunc, VF)) + return true; + auto isScalarInst = [&](User *U) -> bool { + auto *I = cast<Instruction>(U); + return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF); + }; + return any_of(PhiOrTrunc->users(), isScalarInst); + }, + Range); + bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return ShouldScalarizeInstruction(PhiOrTrunc, VF); + }, + Range); + assert(IndDesc.getStartValue() == + Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) { + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI, + NeedsScalarIV, !NeedsScalarIVOnly); + } + assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here"); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, + !NeedsScalarIVOnly); +} + +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( + PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const { + // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. - if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) { - assert(II->getStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II); - } + if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) + return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop, + Range); return nullptr; } @@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( auto *Phi = cast<PHINode>(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); + return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range); } return nullptr; } @@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, if (auto Phi = dyn_cast<PHINode>(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 99c265fc5101..15b349f53fd9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) { return true; } +static InstructionsState getSameOpcode(ArrayRef<Value *> VL, + unsigned BaseIndex = 0); + +/// Checks if the provided operands of 2 cmp instructions are compatible, i.e. +/// compatible instructions or constants, or just some other regular values. +static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, + Value *Op1) { + return (isConstant(BaseOp0) && isConstant(Op0)) || + (isConstant(BaseOp1) && isConstant(Op1)) || + (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) && + !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) || + getSameOpcode({BaseOp0, Op0}).getOpcode() || + getSameOpcode({BaseOp1, Op1}).getOpcode(); +} + /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, - unsigned BaseIndex = 0) { + unsigned BaseIndex) { // Make sure these are all Instructions. if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); })) return InstructionsState(VL[BaseIndex], nullptr, nullptr); bool IsCastOp = isa<CastInst>(VL[BaseIndex]); bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]); + bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]); + CmpInst::Predicate BasePred = + IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate() + : CmpInst::BAD_ICMP_PREDICATE; unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode(); unsigned AltOpcode = Opcode; unsigned AltIndex = BaseIndex; @@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL, continue; } } + } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) { + auto *BaseInst = cast<Instruction>(VL[BaseIndex]); + auto *Inst = cast<Instruction>(VL[Cnt]); + Type *Ty0 = BaseInst->getOperand(0)->getType(); + Type *Ty1 = Inst->getOperand(0)->getType(); + if (Ty0 == Ty1) { + Value *BaseOp0 = BaseInst->getOperand(0); + Value *BaseOp1 = BaseInst->getOperand(1); + Value *Op0 = Inst->getOperand(0); + Value *Op1 = Inst->getOperand(1); + CmpInst::Predicate CurrentPred = + cast<CmpInst>(VL[Cnt])->getPredicate(); + CmpInst::Predicate SwappedCurrentPred = + CmpInst::getSwappedPredicate(CurrentPred); + // Check for compatible operands. If the corresponding operands are not + // compatible - need to perform alternate vectorization. + if (InstOpcode == Opcode) { + if (BasePred == CurrentPred && + areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1)) + continue; + if (BasePred == SwappedCurrentPred && + areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0)) + continue; + if (E == 2 && + (BasePred == CurrentPred || BasePred == SwappedCurrentPred)) + continue; + auto *AltInst = cast<CmpInst>(VL[AltIndex]); + CmpInst::Predicate AltPred = AltInst->getPredicate(); + Value *AltOp0 = AltInst->getOperand(0); + Value *AltOp1 = AltInst->getOperand(1); + // Check if operands are compatible with alternate operands. + if (AltPred == CurrentPred && + areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1)) + continue; + if (AltPred == SwappedCurrentPred && + areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0)) + continue; + } + if (BaseIndex == AltIndex) { + assert(isValidForAlternation(Opcode) && + isValidForAlternation(InstOpcode) && + "Cast isn't safe for alternation, logic needs to be updated!"); + AltIndex = Cnt; + continue; + } + auto *AltInst = cast<CmpInst>(VL[AltIndex]); + CmpInst::Predicate AltPred = AltInst->getPredicate(); + if (BasePred == CurrentPred || BasePred == SwappedCurrentPred || + AltPred == CurrentPred || AltPred == SwappedCurrentPred) + continue; + } } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; return InstructionsState(VL[BaseIndex], nullptr, nullptr); @@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { MapVector<OrdersType, unsigned, DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>> OrdersUses; + // Do the analysis for each tree entry only once, otherwise the order of + // the same node my be considered several times, though might be not + // profitable. SmallPtrSet<const TreeEntry *, 4> VisitedOps; for (const auto &Op : Data.second) { TreeEntry *OpTE = Op.second; + if (!VisitedOps.insert(OpTE).second) + continue; if (!OpTE->ReuseShuffleIndices.empty() || (IgnoreReorder && OpTE == VectorizableTree.front().get())) continue; @@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } else { ++OrdersUses.insert(std::make_pair(Order, 0)).first->second; } - if (VisitedOps.insert(OpTE).second) - OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second += - OpTE->UserTreeIndices.size(); + OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second += + OpTE->UserTreeIndices.size(); assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0."); --OrdersUses[{}]; } @@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. - if (isa<BinaryOperator>(VL0)) { + auto *CI = dyn_cast<CmpInst>(VL0); + if (isa<BinaryOperator>(VL0) || CI) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + if (!CI || all_of(VL, [](Value *V) { + return cast<CmpInst>(V)->isCommutative(); + })) { + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + } else { + CmpInst::Predicate P0 = CI->getPredicate(); + CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate(); + CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0); + Value *BaseOp0 = VL0->getOperand(0); + Value *BaseOp1 = VL0->getOperand(1); + // Collect operands - commute if it uses the swapped predicate or + // alternate operation. + for (Value *V : VL) { + auto *Cmp = cast<CmpInst>(V); + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + if ((P0 == CurrentPred && + !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || + (P0 == CurrentPredSwapped && + !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS))) + std::swap(LHS, RHS); + } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) { + std::swap(LHS, RHS); + } + Left.push_back(LHS); + Right.push_back(RHS); + } + } TE->setOperand(0, Left); TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); @@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode()))) && + Instruction::isCast(E->getAltOpcode())) || + (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && "Invalid Shuffle Vector Operand"); InstructionCost ScalarCost = 0; if (NeedToShuffleReuses) { @@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { + VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, + Builder.getInt1Ty(), + CI0->getPredicate(), CostKind, VL0); + VecCost += TTI->getCmpSelInstrCost( + E->getOpcode(), ScalarTy, Builder.getInt1Ty(), + cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, + E->getAltOp()); } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); @@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { + auto *AltCI0 = cast<CmpInst>(E->getAltOp()); + auto *CI = cast<CmpInst>(I); + CmpInst::Predicate P0 = CI0->getPredicate(); + CmpInst::Predicate AltP0 = AltCI0->getPredicate(); + CmpInst::Predicate AltP0Swapped = + CmpInst::getSwappedPredicate(AltP0); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + // Alternate cmps have same/swapped predicate as main cmps but + // different order of compatible operands. + return !( + (P0 == CurrentPred && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(0), I->getOperand(1))) || + (P0 == CurrentPredSwapped && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(1), I->getOperand(0)))); + } + return CurrentPred != P0 && CurrentPredSwapped != P0; + } return I->getOpcode() == E->getAltOpcode(); }, Mask); @@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode()))) && + Instruction::isCast(E->getAltOpcode())) || + (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && "Invalid Shuffle Vector Operand"); Value *LHS = nullptr, *RHS = nullptr; - if (Instruction::isBinaryOp(E->getOpcode())) { + if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) { setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); @@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS); + } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { + V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS); + auto *AltCI = cast<CmpInst>(E->getAltOp()); + CmpInst::Predicate AltPred = AltCI->getPredicate(); + unsigned AltIdx = + std::distance(E->Scalars.begin(), find(E->Scalars, AltCI)); + if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx]) + AltPred = CmpInst::getSwappedPredicate(AltPred); + V1 = Builder.CreateCmp(AltPred, LHS, RHS); } else { V0 = Builder.CreateCast( static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy); @@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { + auto *AltCI0 = cast<CmpInst>(E->getAltOp()); + auto *CI = cast<CmpInst>(I); + CmpInst::Predicate P0 = CI0->getPredicate(); + CmpInst::Predicate AltP0 = AltCI0->getPredicate(); + CmpInst::Predicate AltP0Swapped = + CmpInst::getSwappedPredicate(AltP0); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + // Alternate cmps have same/swapped predicate as main cmps but + // different order of compatible operands. + return !( + (P0 == CurrentPred && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(0), I->getOperand(1))) || + (P0 == CurrentPredSwapped && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(1), I->getOperand(0)))); + } + return CurrentPred != P0 && CurrentPredSwapped != P0; + } return I->getOpcode() == E->getAltOpcode(); }, Mask, &OpScalars, &AltScalars); @@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (ScheduleData *BundleMember = picked; BundleMember; BundleMember = BundleMember->NextInBundle) { Instruction *pickedInst = BundleMember->Inst; - if (pickedInst->getNextNode() != LastScheduledInst) { - BS->BB->getInstList().remove(pickedInst); - BS->BB->getInstList().insert(LastScheduledInst->getIterator(), - pickedInst); - } + if (pickedInst->getNextNode() != LastScheduledInst) + pickedInst->moveBefore(LastScheduledInst); LastScheduledInst = pickedInst; } @@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (R.isTreeTinyAndNotFullyVectorizable()) continue; R.reorderTopToBottom(); - R.reorderBottomToTop(); + R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front())); R.buildExternalUses(); R.computeMinimumValueSizes(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index e5dded3c0f1e..8822c0004eb2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -75,7 +75,8 @@ class VPRecipeBuilder { /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands) const; + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp index a96c122db2a9..342d4a074e10 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1649,3 +1649,9 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) { for (VPValue *Def : Recipe.definedValues()) assignSlot(Def); } + +bool vputils::onlyFirstLaneUsed(VPValue *Def) { + return all_of(Def->users(), [Def](VPUser *U) { + return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def); + }); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h index 824440f98a8b..bcaabca692cc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h @@ -759,6 +759,14 @@ public: bool mayReadOrWriteMemory() const { return mayReadFromMemory() || mayWriteToMemory(); } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + /// Conservatively returns false. + virtual bool onlyFirstLaneUsed(const VPValue *Op) const { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return false; + } }; inline bool VPUser::classof(const VPDef *Def) { @@ -893,6 +901,24 @@ public: /// Set the fast-math flags. void setFastMathFlags(FastMathFlags FMFNew); + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + if (getOperand(0) != Op) + return false; + switch (getOpcode()) { + default: + return false; + case VPInstruction::ActiveLaneMask: + case VPInstruction::CanonicalIVIncrement: + case VPInstruction::CanonicalIVIncrementNUW: + case VPInstruction::BranchOnCount: + return true; + }; + llvm_unreachable("switch should return"); + } }; /// VPWidenRecipe is a recipe for producing a copy of vector type its @@ -1027,18 +1053,24 @@ public: class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; + bool NeedsScalarIV; + bool NeedsVectorIV; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, - const InductionDescriptor &IndDesc) + const InductionDescriptor &IndDesc, + bool NeedsScalarIV, bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, - TruncInst *Trunc) + TruncInst *Trunc, bool NeedsScalarIV, + bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1082,6 +1114,12 @@ public: const TruncInst *TruncI = getTruncInst(); return TruncI ? TruncI->getType() : IV->getType(); } + + /// Returns true if a scalar phi needs to be created for the induction. + bool needsScalarIV() const { return NeedsScalarIV; } + + /// Returns true if a vector phi needs to be created for the induction. + bool needsVectorIV() const { return NeedsVectorIV; } }; /// A pure virtual base class for all recipes modeling header phis, including @@ -1318,6 +1356,17 @@ public: void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + // Recursing through Blend recipes only, must terminate at header phi's the + // latest. + return all_of(users(), [this](VPUser *U) { + return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this); + }); + } }; /// VPInterleaveRecipe is a recipe for transforming an interleave group of load @@ -1495,6 +1544,13 @@ public: bool isPacked() const { return AlsoPack; } bool isPredicated() const { return IsPredicated; } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return isUniform(); + } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1651,6 +1707,16 @@ public: void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + + // Widened, consecutive memory operations only demand the first lane of + // their address. + return Op == getAddr() && isConsecutive(); + } }; /// Canonical scalar induction phi of the vector loop. Starting at the specified @@ -1686,6 +1752,13 @@ public: const Type *getScalarType() const { return getOperand(0)->getLiveInIRValue()->getType(); } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// A Recipe for widening the canonical induction variable of the vector loop. @@ -2766,6 +2839,14 @@ public: /// Return true if all visited instruction can be combined. bool isCompletelySLP() const { return CompletelySLP; } }; + +namespace vputils { + +/// Returns true if only the first lane of \p Def is used. +bool onlyFirstLaneUsed(VPValue *Def); + +} // end namespace vputils + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fb5f3d428189..70ce773a8a85 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -47,7 +47,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes( auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II); + NewRecipe = + new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -341,10 +342,16 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); - // If the induction recipe is canonical and the types match, use it - // directly. - if (WidenOriginalIV && WidenOriginalIV->isCanonical() && - WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) { + if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || + WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) + continue; + + // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides + // everything WidenNewIV's users need. That is, WidenOriginalIV will + // generate a vector phi or all users of WidenNewIV demand the first lane + // only. + if (WidenOriginalIV->needsVectorIV() || + vputils::onlyFirstLaneUsed(WidenNewIV)) { WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); return; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp index 0296a995ad29..010ca28fc237 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" using namespace llvm; |