diff options
Diffstat (limited to 'llvm/lib/Transforms')
69 files changed, 2385 insertions, 2311 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index f7bbdcffd2ec..81b43a2ab2c2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -112,8 +112,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) { return coro::declaresIntrinsics( M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace", - "llvm.coro.async.resume"}); + "llvm.coro.id.async", "llvm.coro.id.retcon.once", + "llvm.coro.async.size.replace", "llvm.coro.async.resume"}); } PreservedAnalyses CoroCleanupPass::run(Module &M, diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 5557370c82ba..af35b45c2eaf 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -17,8 +17,6 @@ namespace llvm { class CallGraph; -class CallGraphSCC; -class PassRegistry; namespace coro { diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9c1b247cdb39..722a1c6ec0ce 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1961,6 +1961,13 @@ static coro::Shape splitCoroutine(Function &F, return Shape; } +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(const coro::Shape &Shape) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + } +} + static void updateCallGraphAfterCoroutineSplit( LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, @@ -1969,10 +1976,14 @@ static void updateCallGraphAfterCoroutineSplit( if (!Shape.CoroBegin) return; - for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); + if (Shape.ABI != coro::ABI::Switch) + removeCoroEnds(Shape); + else { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } } if (!Clones.empty()) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index e5ff98e4f73f..37c773bd47d6 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -326,7 +326,7 @@ static bool getPotentialCopiesOfMemoryValue( << " (only exact: " << OnlyExact << ")\n";); Value &Ptr = *I.getPointerOperand(); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, UsedAssumedInformation)) { LLVM_DEBUG( @@ -343,6 +343,7 @@ static bool getPotentialCopiesOfMemoryValue( const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); + LLVM_DEBUG(dbgs() << "Visit " << Objects.size() << " objects:\n"); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa<UndefValue>(Obj)) @@ -352,8 +353,8 @@ static bool getPotentialCopiesOfMemoryValue( // be OK. We do not try to optimize the latter. if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && - A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == - Obj) + A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation, + AA::Interprocedural) == Obj) continue; LLVM_DEBUG( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); @@ -375,25 +376,37 @@ static bool getPotentialCopiesOfMemoryValue( return false; } - if (IsLoad) { - Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); - if (!InitialValue) - return false; - NewCopies.push_back(InitialValue); - NewCopyOrigins.push_back(nullptr); - } + bool NullOnly = true; + bool NullRequired = false; + auto CheckForNullOnlyAndUndef = [&](Optional<Value *> V, bool IsExact) { + if (!V || *V == nullptr) + NullOnly = false; + else if (isa<UndefValue>(*V)) + /* No op */; + else if (isa<Constant>(*V) && cast<Constant>(*V)->isNullValue()) + NullRequired = !IsExact; + else + NullOnly = false; + }; auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) return true; if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - if (OnlyExact && !IsExact && + CheckForNullOnlyAndUndef(Acc.getContent(), IsExact); + if (OnlyExact && !IsExact && !NullOnly && !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) { LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() << ", abort!\n"); return false; } + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Required all `null` accesses due to non exact " + "one, however found non-null one: " + << *Acc.getRemoteInst() << ", abort!\n"); + return false; + } if (IsLoad) { assert(isa<LoadInst>(I) && "Expected load or store instruction only!"); if (!Acc.isWrittenValueUnknown()) { @@ -424,15 +437,36 @@ static bool getPotentialCopiesOfMemoryValue( return true; }; + // If the value has been written to we don't need the initial value of the + // object. + bool HasBeenWrittenTo = false; + auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, + HasBeenWrittenTo)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " << *Obj << "\n"); return false; } + + if (IsLoad && !HasBeenWrittenTo) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + CheckForNullOnlyAndUndef(InitialValue, /* IsExact */ true); + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Non exact access but initial value that is not " + "null or undef, abort!\n"); + return false; + } + + NewCopies.push_back(InitialValue); + NewCopyOrigins.push_back(nullptr); + } + PIs.push_back(&PI); } @@ -520,12 +554,21 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " << FromI + << " is not checked backwards, abort\n"); + return true; + } + SmallPtrSet<const Instruction *, 8> Visited; SmallVector<const Instruction *> Worklist; Worklist.push_back(&FromI); - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); while (!Worklist.empty()) { const Instruction *CurFromI = Worklist.pop_back_val(); if (!Visited.insert(CurFromI).second) @@ -545,26 +588,13 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << *ToI << " [Intra]\n"); if (Result) return true; - if (NoRecurseAA.isAssumedNoRecurse()) - continue; - } - - // TODO: If we can go arbitrarily backwards we will eventually reach an - // entry point that can reach ToI. Only once this takes a set of blocks - // through which we cannot go, or once we track internal functions not - // accessible from the outside, it makes sense to perform backwards analysis - // in the absence of a GoBackwardsCB. - if (!GoBackwardsCB) { - LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " - << *CurFromI << " is not checked backwards, abort\n"); - return true; } // Check if the current instruction is already known to reach the ToFn. const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>( QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); bool Result = FnReachabilityAA.instructionCanReach( - A, *CurFromI, ToFn, /* UseBackwards */ false); + A, *CurFromI, ToFn); LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() << " " << (Result ? "can potentially " : "cannot ") << "reach @" << ToFn.getName() << " [FromFn]\n"); @@ -1038,60 +1068,74 @@ Attributor::getAssumedConstant(const IRPosition &IRP, } if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue())) return C; - const auto &ValueSimplifyAA = - getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return llvm::None; - } - if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return UndefValue::get(IRP.getAssociatedType()); + SmallVector<AA::ValueAndContext> Values; + if (getAssumedSimplifiedValues(IRP, &AA, Values, + AA::ValueScope::Interprocedural, + UsedAssumedInformation)) { + if (Values.empty()) + return llvm::None; + if (auto *C = dyn_cast_or_null<Constant>( + AAPotentialValues::getSingleValue(*this, AA, IRP, Values))) + return C; } - Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value()); - if (CI) - CI = dyn_cast_or_null<Constant>( - AA::getWithType(*CI, *IRP.getAssociatedType())); - if (CI) - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return CI; + return nullptr; } -Optional<Value *> -Attributor::getAssumedSimplified(const IRPosition &IRP, - const AbstractAttribute *AA, - bool &UsedAssumedInformation) { +Optional<Value *> Attributor::getAssumedSimplified(const IRPosition &IRP, + const AbstractAttribute *AA, + bool &UsedAssumedInformation, + AA::ValueScope S) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) return CB(IRP, AA, UsedAssumedInformation); - // If no high-level/outside simplification occurred, use AAValueSimplify. - const auto &ValueSimplifyAA = - getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); + SmallVector<AA::ValueAndContext> Values; + if (!getAssumedSimplifiedValues(IRP, AA, Values, S, UsedAssumedInformation)) + return &IRP.getAssociatedValue(); + if (Values.empty()) return llvm::None; + if (AA) + if (Value *V = AAPotentialValues::getSingleValue(*this, *AA, IRP, Values)) + return V; + if (IRP.getPositionKind() == IRPosition::IRP_RETURNED || + IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_RETURNED) + return nullptr; + return &IRP.getAssociatedValue(); +} + +bool Attributor::getAssumedSimplifiedValues( + const IRPosition &IRP, const AbstractAttribute *AA, + SmallVectorImpl<AA::ValueAndContext> &Values, AA::ValueScope S, + bool &UsedAssumedInformation) { + // First check all callbacks provided by outside AAs. If any of them returns + // a non-null value that is different from the associated value, or None, we + // assume it's simplified. + const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP); + for (auto &CB : SimplificationCBs) { + Optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation); + if (!CBResult.has_value()) + continue; + Value *V = CBResult.value(); + if (!V) + return false; + if ((S & AA::ValueScope::Interprocedural) || + AA::isValidInScope(*V, IRP.getAnchorScope())) + Values.push_back(AA::ValueAndContext{*V, nullptr}); + else + return false; } - if (*SimplifiedV == nullptr) - return const_cast<Value *>(&IRP.getAssociatedValue()); - if (Value *SimpleV = - AA::getWithType(**SimplifiedV, *IRP.getAssociatedType())) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); - return SimpleV; - } - return const_cast<Value *>(&IRP.getAssociatedValue()); + if (!SimplificationCBs.empty()) + return true; + + // If no high-level/outside simplification occurred, use AAPotentialValues. + const auto &PotentialValuesAA = + getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL); + if (!PotentialValuesAA.getAssumedSimplifiedValues(*this, Values, S)) + return false; + UsedAssumedInformation |= !PotentialValuesAA.isAtFixpoint(); + return true; } Optional<Value *> Attributor::translateArgumentToCallSiteContent( @@ -1106,7 +1150,7 @@ Optional<Value *> Attributor::translateArgumentToCallSiteContent( if (!Arg->hasPointeeInMemoryValueAttr()) return getAssumedSimplified( IRPosition::callsite_argument(CB, Arg->getArgNo()), AA, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); return nullptr; } @@ -1295,8 +1339,21 @@ bool Attributor::checkForAllUses( SmallVector<const Use *, 16> Worklist; SmallPtrSet<const Use *, 16> Visited; - for (const Use &U : V.uses()) - Worklist.push_back(&U); + auto AddUsers = [&](const Value &V, const Use *OldUse) { + for (const Use &UU : V.uses()) { + if (OldUse && EquivalentUseCB && !EquivalentUseCB(*OldUse, UU)) { + LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " + "rejected by the equivalence call back: " + << *UU << "!\n"); + return false; + } + + Worklist.push_back(&UU); + } + return true; + }; + + AddUsers(V, /* OldUse */ nullptr); LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size() << " initial uses to check\n"); @@ -1342,15 +1399,8 @@ bool Attributor::checkForAllUses( << PotentialCopies.size() << " potential copies instead!\n"); for (Value *PotentialCopy : PotentialCopies) - for (const Use &CopyUse : PotentialCopy->uses()) { - if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) { - LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " - "rejected by the equivalence call back: " - << *CopyUse << "!\n"); - return false; - } - Worklist.push_back(&CopyUse); - } + if (!AddUsers(*PotentialCopy, U)) + return false; continue; } } @@ -1361,8 +1411,25 @@ bool Attributor::checkForAllUses( return false; if (!Follow) continue; - for (const Use &UU : U->getUser()->uses()) - Worklist.push_back(&UU); + + User &Usr = *U->getUser(); + AddUsers(Usr, /* OldUse */ nullptr); + + auto *RI = dyn_cast<ReturnInst>(&Usr); + if (!RI) + continue; + + Function &F = *RI->getFunction(); + auto CallSitePred = [&](AbstractCallSite ACS) { + return AddUsers(*ACS.getInstruction(), U); + }; + if (!checkForAllCallSites(CallSitePred, F, /* RequireAllCallSites */ true, + &QueryingAA, UsedAssumedInformation)) { + LLVM_DEBUG(dbgs() << "[Attributor] Could not follow return instruction " + "to all call sites: " + << *RI << "\n"); + return false; + } } return true; @@ -1918,7 +1985,8 @@ ChangeStatus Attributor::cleanupIR() { << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedValues.size() << " values and " << ToBeChangedUses.size() << " uses. To insert " - << ToBeChangedToUnreachableInsts.size() << " unreachables." + << ToBeChangedToUnreachableInsts.size() + << " unreachables.\n" << "Preserve manifest added " << ManifestAddedBlocks.size() << " blocks\n"); @@ -2046,6 +2114,8 @@ ChangeStatus Attributor::cleanupIR() { } for (auto &V : ToBeChangedToUnreachableInsts) if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { + LLVM_DEBUG(dbgs() << "[Attributor] Change to unreachable: " << *I + << "\n"); assert(isRunOn(*I->getFunction()) && "Cannot replace an instruction outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); @@ -2877,7 +2947,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be simplified. bool UsedAssumedInformation = false; - getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every returned value might be marked noundef. getOrCreateAAFor<AANoUndef>(RetPos); @@ -2906,7 +2977,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // interface though as outside AAs can register custom simplification // callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every argument might be dead. getOrCreateAAFor<AAIsDead>(ArgPos); @@ -2970,7 +3042,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition CBRetPos = IRPosition::callsite_returned(CB); bool UsedAssumedInformation = false; - getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); } for (int I = 0, E = CB.arg_size(); I < E; ++I) { @@ -2984,7 +3057,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Attributor interface though as outside AAs can register custom // simplification callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every call site argument might be marked "noundef". getOrCreateAAFor<AANoUndef>(CBArgPos); @@ -3034,12 +3108,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition::value(*cast<LoadInst>(I).getPointerOperand())); if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); } else { auto &SI = cast<StoreInst>(I); getOrCreateAAFor<AAIsDead>(IRPosition::inst(I)); getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand())); } return true; @@ -3126,6 +3200,26 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, return OS; } +raw_ostream &llvm::operator<<(raw_ostream &OS, + const PotentialLLVMValuesState &S) { + OS << "set-state(< {"; + if (!S.isValidState()) + OS << "full-set"; + else { + for (auto &It : S.getAssumedSet()) { + if (auto *F = dyn_cast<Function>(It.first.getValue())) + OS << "@" << F->getName() << "[" << int(It.second) << "], "; + else + OS << *It.first.getValue() << "[" << int(It.second) << "], "; + } + if (S.undefIsContained()) + OS << "undef "; + } + OS << "} >)"; + + return OS; +} + void AbstractAttribute::print(raw_ostream &OS) const { OS << "["; OS << getName(); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1ff54b78e27e..660ff3ee9563 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -14,12 +14,14 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" @@ -35,11 +37,13 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -72,6 +76,8 @@ static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), template <> unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0; +template <> unsigned llvm::PotentialLLVMValuesState::MaxPotentialValues = -1; + static cl::opt<unsigned, true> MaxPotentialValues( "attributor-max-potential-values", cl::Hidden, cl::desc("Maximum number of potential values to be " @@ -79,6 +85,12 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); +static cl::opt<int> MaxPotentialValuesIterations( + "attributor-max-potential-values-iterations", cl::Hidden, + cl::desc( + "Maximum number of iterations we keep dismantling potential values."), + cl::init(64)); + static cl::opt<unsigned> MaxInterferingAccesses( "attributor-max-interfering-accesses", cl::Hidden, cl::desc("Maximum number of interfering accesses to " @@ -162,6 +174,7 @@ PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) PIPE_OPERATOR(AAPotentialConstantValues) +PIPE_OPERATOR(AAPotentialValues) PIPE_OPERATOR(AANoUndef) PIPE_OPERATOR(AACallEdges) PIPE_OPERATOR(AAFunctionReachability) @@ -293,228 +306,35 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, return Ptr; } -/// Recursively visit all values that might become \p IRP at some point. This -/// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. Once we cannot look through the value any -/// further, the callback \p VisitValueCB is invoked and passed the current -/// value, the \p State, and a flag to indicate if we stripped anything. -/// Stripped means that we unpacked the value associated with \p IRP at least -/// once. Note that the value used for the callback may still be the value -/// associated with \p IRP (due to PHIs). To limit how much effort is invested, -/// we will never visit more values than specified by \p MaxValues. -/// If \p VS does not contain the Interprocedural bit, only values valid in the -/// scope of \p CtxI will be visited and simplification into other scopes is -/// prevented. -template <typename StateTy> -static bool genericValueTraversal( - Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, - StateTy &State, - function_ref<bool(Value &, const Instruction *, StateTy &, bool)> - VisitValueCB, - const Instruction *CtxI, bool &UsedAssumedInformation, - bool UseValueSimplify = true, int MaxValues = 16, - function_ref<Value *(Value *)> StripCB = nullptr, - AA::ValueScope VS = AA::Interprocedural) { - - struct LivenessInfo { - const AAIsDead *LivenessAA = nullptr; - bool AnyDead = false; - }; - SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; - auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { - LivenessInfo &LI = LivenessAAs[&F]; - if (!LI.LivenessAA) - LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F), - DepClassTy::NONE); - return LI; - }; - - Value *InitialV = &IRP.getAssociatedValue(); - using Item = std::pair<Value *, const Instruction *>; - SmallSet<Item, 16> Visited; - SmallVector<Item, 16> Worklist; - Worklist.push_back({InitialV, CtxI}); - - int Iteration = 0; - do { - Item I = Worklist.pop_back_val(); - Value *V = I.first; - CtxI = I.second; - if (StripCB) - V = StripCB(V); - - // Check if we should process the current value. To prevent endless - // recursion keep a record of the values we followed! - if (!Visited.insert(I).second) - continue; - - // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) { - LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " - << Iteration << "!\n"); - return false; - } - - // Explicitly look through calls with a "returned" attribute if we do - // not have a pointer as stripPointerCasts only works on them. - Value *NewV = nullptr; - if (V->getType()->isPointerTy()) { - NewV = V->stripPointerCasts(); - } else { - auto *CB = dyn_cast<CallBase>(V); - if (CB && CB->getCalledFunction()) { - for (Argument &Arg : CB->getCalledFunction()->args()) - if (Arg.hasReturnedAttr()) { - NewV = CB->getArgOperand(Arg.getArgNo()); - break; - } - } - } - if (NewV && NewV != V) { - Worklist.push_back({NewV, CtxI}); - continue; - } - - // Look through select instructions, visit assumed potential values. - if (auto *SI = dyn_cast<SelectInst>(V)) { - Optional<Constant *> C = A.getAssumedConstant( - *SI->getCondition(), QueryingAA, UsedAssumedInformation); - bool NoValueYet = !C; - if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) - continue; - if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { - if (CI->isZero()) - Worklist.push_back({SI->getFalseValue(), CtxI}); - else - Worklist.push_back({SI->getTrueValue(), CtxI}); - continue; - } - // We could not simplify the condition, assume both values.( - Worklist.push_back({SI->getTrueValue(), CtxI}); - Worklist.push_back({SI->getFalseValue(), CtxI}); - continue; - } - - // Look through phi nodes, visit all live operands. - if (auto *PHI = dyn_cast<PHINode>(V)) { - LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction()); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { - LI.AnyDead = true; - UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint(); - continue; - } - Worklist.push_back( - {PHI->getIncomingValue(u), IncomingBB->getTerminator()}); - } - continue; - } - - if (auto *Arg = dyn_cast<Argument>(V)) { - if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) { - SmallVector<Item> CallSiteValues; - bool UsedAssumedInformation = false; - if (A.checkForAllCallSites( - [&](AbstractCallSite ACS) { - // Callbacks might not have a corresponding call site operand, - // stick with the argument in that case. - Value *CSOp = ACS.getCallArgOperand(*Arg); - if (!CSOp) - return false; - CallSiteValues.push_back({CSOp, ACS.getInstruction()}); - return true; - }, - *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) { - Worklist.append(CallSiteValues); - continue; - } - } - } - - if (UseValueSimplify && !isa<Constant>(V)) { - Optional<Value *> SimpleV = - A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) - continue; - Value *NewV = SimpleV.value(); - if (NewV && NewV != V) { - if ((VS & AA::Interprocedural) || !CtxI || - AA::isValidInScope(*NewV, CtxI->getFunction())) { - Worklist.push_back({NewV, CtxI}); - continue; - } - } - } - - if (auto *LI = dyn_cast<LoadInst>(V)) { - bool UsedAssumedInformation = false; - // If we ask for the potentially loaded values from the initial pointer we - // will simply end up here again. The load is as far as we can make it. - if (LI->getPointerOperand() != InitialV) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, QueryingAA, - UsedAssumedInformation, - /* OnlyExact */ true)) { - // Values have to be dynamically unique or we loose the fact that a - // single llvm::Value might represent two runtime values (e.g., stack - // locations in different recursive calls). - bool DynamicallyUnique = - llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { - return AA::isDynamicallyUnique(A, QueryingAA, *PC); - }); - if (DynamicallyUnique && - ((VS & AA::Interprocedural) || !CtxI || - llvm::all_of(PotentialCopies, [CtxI](Value *PC) { - return AA::isValidInScope(*PC, CtxI->getFunction()); - }))) { - for (auto *PotentialCopy : PotentialCopies) - Worklist.push_back({PotentialCopy, CtxI}); - continue; - } - } - } - } - - // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { - LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " - << *V << "!\n"); - return false; - } - } while (!Worklist.empty()); - - // If we actually used liveness information so we have to record a dependence. - for (auto &It : LivenessAAs) - if (It.second.AnyDead) - A.recordDependence(*It.second.LivenessAA, QueryingAA, - DepClassTy::OPTIONAL); - - // All values have been visited. - return true; -} - bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, - SmallVectorImpl<Value *> &Objects, + SmallSetVector<Value *, 8> &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, bool &UsedAssumedInformation, - AA::ValueScope VS) { - auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; - SmallPtrSet<Value *, 8> SeenObjects; - auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, - SmallVectorImpl<Value *> &Objects, - bool) -> bool { - if (SeenObjects.insert(&Val).second) - Objects.push_back(&Val); + AA::ValueScope S, + SmallPtrSetImpl<Value *> *SeenObjects) { + SmallPtrSet<Value *, 8> LocalSeenObjects; + if (!SeenObjects) + SeenObjects = &LocalSeenObjects; + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::value(Ptr), &QueryingAA, Values, + S, UsedAssumedInformation)) { + Objects.insert(const_cast<Value *>(&Ptr)); return true; - }; - if (!genericValueTraversal<decltype(Objects)>( - A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - UsedAssumedInformation, true, 32, StripCB, VS)) - return false; + } + + for (auto &VAC : Values) { + Value *UO = getUnderlyingObject(VAC.getValue()); + if (UO && UO != VAC.getValue() && SeenObjects->insert(UO).second) { + if (!getAssumedUnderlyingObjects(A, *UO, Objects, QueryingAA, + VAC.getCtxI(), UsedAssumedInformation, S, + SeenObjects)) + return false; + continue; + } + Objects.insert(VAC.getValue()); + } return true; } @@ -1122,9 +942,6 @@ struct AAPointerInfoImpl using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfo::initialize(A); } - /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return std::string("PointerInfo ") + @@ -1144,9 +961,14 @@ struct AAPointerInfoImpl const override { return State::forallInterferingAccesses(OAS, CB); } - bool forallInterferingAccesses( - Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, - function_ref<bool(const Access &, bool)> UserCB) const override { + + bool + forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA, + Instruction &I, + function_ref<bool(const Access &, bool)> UserCB, + bool &HasBeenWrittenTo) const override { + HasBeenWrittenTo = false; + SmallPtrSet<const Access *, 8> DominatingWrites; SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses; @@ -1182,14 +1004,12 @@ struct AAPointerInfoImpl const bool FindInterferingWrites = I.mayReadFromMemory(); const bool FindInterferingReads = I.mayWriteToMemory(); - const bool UseDominanceReasoning = FindInterferingWrites; + const bool UseDominanceReasoning = + FindInterferingWrites && NoRecurseAA.isKnownNoRecurse(); const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning - ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( - Scope) - : nullptr; + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope); enum GPUAddressSpace : unsigned { Generic = 0, @@ -1246,22 +1066,17 @@ struct AAPointerInfoImpl (!FindInterferingReads || !Acc.isRead())) return true; + bool Dominates = DT && Exact && Acc.isMustAccess() && + (Acc.getLocalInst()->getFunction() == &Scope) && + DT->dominates(Acc.getRemoteInst(), &I); + if (FindInterferingWrites && Dominates) + HasBeenWrittenTo = true; + // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. - if (CanUseCFGResoning) { - if ((!Acc.isWrite() || - !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, - IsLiveInCalleeCB)) && - (!Acc.isRead() || - !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, - IsLiveInCalleeCB))) - return true; - if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && - IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &I)) - DominatingWrites.insert(&Acc); - } - } + if (CanUseCFGResoning && Dominates && UseDominanceReasoning && + IsSameThreadAsLoad(Acc)) + DominatingWrites.insert(&Acc); InterferingAccesses.push_back({&Acc, Exact}); return true; @@ -1269,19 +1084,27 @@ struct AAPointerInfoImpl if (!State::forallInterferingAccesses(I, AccessCB)) return false; - // If we cannot use CFG reasoning we only filter the non-write accesses - // and are done here. - if (!CanUseCFGResoning) { - for (auto &It : InterferingAccesses) - if (!UserCB(*It.first, It.second)) - return false; - return true; + if (HasBeenWrittenTo) { + const Function *ScopePtr = &Scope; + IsLiveInCalleeCB = [ScopePtr](const Function &Fn) { + return ScopePtr != &Fn; + }; } // Helper to determine if we can skip a specific write access. This is in // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) + return true; + + if (!DT || !UseDominanceReasoning) + return false; if (!IsSameThreadAsLoad(Acc)) return false; if (!DominatingWrites.count(&Acc)) @@ -1303,7 +1126,7 @@ struct AAPointerInfoImpl // succeeded for all or not. unsigned NumInterferingAccesses = InterferingAccesses.size(); for (auto &It : InterferingAccesses) { - if (!DT || NumInterferingAccesses > MaxInterferingAccesses || + if (NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; @@ -1339,8 +1162,9 @@ struct AAPointerInfoImpl if (FromCallee) { Content = A.translateArgumentToCallSiteContent( RAcc.getContent(), CB, *this, UsedAssumedInformation); - AK = AccessKind( - AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE)); + AK = + AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); + AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } Changed = Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content, @@ -1353,6 +1177,27 @@ struct AAPointerInfoImpl /// Statistic tracking for all AAPointerInfo implementations. /// See AbstractAttribute::trackStatistics(). void trackPointerInfoStatistics(const IRPosition &IRP) const {} + + /// Dump the state into \p O. + void dumpState(raw_ostream &O) { + for (auto &It : AccessBins) { + O << "[" << It.first.getOffset() << "-" + << It.first.getOffset() + It.first.getSize() + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { + O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; + if (Acc.getLocalInst() != Acc.getRemoteInst()) + O << " --> " << *Acc.getRemoteInst() + << "\n"; + if (!Acc.isWrittenValueYetUndetermined()) { + if (Acc.getWrittenValue()) + O << " - c: " << *Acc.getWrittenValue() << "\n"; + else + O << " - c: <unknown>\n"; + } + } + } + } }; struct AAPointerInfoFloating : public AAPointerInfoImpl { @@ -1360,9 +1205,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { AAPointerInfoFloating(const IRPosition &IRP, Attributor &A) : AAPointerInfoImpl(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); } - /// Deal with an access and signal if it was handled successfully. bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, Optional<Value *> Content, AccessKind Kind, int64_t Offset, @@ -1460,7 +1302,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa<CastInst>(Usr) || isa<SelectInst>(Usr)) + if (isa<CastInst>(Usr) || isa<SelectInst>(Usr) || isa<ReturnInst>(Usr)) return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); // For PHIs we need to take care of the recurrence explicitly as the value @@ -1469,6 +1311,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (isa<PHINode>(Usr)) { // Note the order here, the Usr access might change the map, CurPtr is // already in it though. + bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); OffsetInfo &UsrOI = OffsetInfoMap[Usr]; OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; // Check if the PHI is invariant (so far). @@ -1484,52 +1327,69 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { } // Check if the PHI operand is not dependent on the PHI itself. - // TODO: This is not great as we look at the pointer type. However, it - // is unclear where the Offset size comes from with typeless pointers. APInt Offset( DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()), 0); - if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( - DL, Offset, /* AllowNonInbounds */ true)) { - if (Offset != PtrOI.Offset) { - LLVM_DEBUG(dbgs() - << "[AAPointerInfo] PHI operand pointer offset mismatch " - << *CurPtr << " in " << *Usr << "\n"); - return false; - } - return HandlePassthroughUser(Usr, PtrOI, Follow); + Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true); + auto It = OffsetInfoMap.find(CurPtrBase); + if (It != OffsetInfoMap.end()) { + Offset += It->getSecond().Offset; + if (IsFirstPHIUser || Offset == UsrOI.Offset) + return HandlePassthroughUser(Usr, PtrOI, Follow); + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + } else { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); } // TODO: Approximate in case we know the direction of the recurrence. - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " - << *CurPtr << " in " << *Usr << "\n"); UsrOI = PtrOI; UsrOI.Offset = OffsetAndSize::Unknown; Follow = true; return true; } - if (auto *LoadI = dyn_cast<LoadInst>(Usr)) - return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, - AccessKind::AK_READ, OffsetInfoMap[CurPtr].Offset, - Changed, LoadI->getType()); + if (auto *LoadI = dyn_cast<LoadInst>(Usr)) { + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be read. + AccessKind AK = AccessKind::AK_R; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); + return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AK, + OffsetInfoMap[CurPtr].Offset, Changed, + LoadI->getType()); + } + if (auto *StoreI = dyn_cast<StoreInst>(Usr)) { if (StoreI->getValueOperand() == CurPtr) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store " << *StoreI << "\n"); return false; } + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be written. + AccessKind AK = AccessKind::AK_W; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); bool UsedAssumedInformation = false; - Optional<Value *> Content = A.getAssumedSimplified( - *StoreI->getValueOperand(), *this, UsedAssumedInformation); - return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE, + Optional<Value *> Content = + A.getAssumedSimplified(*StoreI->getValueOperand(), *this, + UsedAssumedInformation, AA::Interprocedural); + return handleAccess(A, *StoreI, *CurPtr, Content, AK, OffsetInfoMap[CurPtr].Offset, Changed, StoreI->getValueOperand()->getType()); } if (auto *CB = dyn_cast<CallBase>(Usr)) { if (CB->isLifetimeStartOrEnd()) return true; - if (TLI && isFreeCall(CB, TLI)) + if (getFreedOperand(CB, TLI) == U) return true; if (CB->isArgOperand(&U)) { unsigned ArgNo = CB->getArgOperandNo(&U); @@ -1539,7 +1399,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) | Changed; - return true; + return isValidState(); } LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB << "\n"); @@ -1551,36 +1411,30 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return false; }; auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { - if (OffsetInfoMap.count(NewU)) + if (OffsetInfoMap.count(NewU)) { + LLVM_DEBUG({ + if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { + dbgs() << "[AAPointerInfo] Equivalent use callback failed: " + << OffsetInfoMap[NewU].Offset << " vs " + << OffsetInfoMap[OldU].Offset << "\n"; + } + }); return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + } OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; return true; }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - /* IgnoreDroppableUses */ true, EquivalentUseCB)) + /* IgnoreDroppableUses */ true, EquivalentUseCB)) { + LLVM_DEBUG( + dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n"); return indicatePessimisticFixpoint(); + } LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; - for (auto &It : AccessBins) { - dbgs() << "[" << It.first.getOffset() << "-" - << It.first.getOffset() + It.first.getSize() - << "] : " << It.getSecond()->size() << "\n"; - for (auto &Acc : *It.getSecond()) { - dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() - << "\n"; - if (Acc.getLocalInst() != Acc.getRemoteInst()) - dbgs() << " --> " - << *Acc.getRemoteInst() << "\n"; - if (!Acc.isWrittenValueYetUndetermined()) { - if (Acc.getWrittenValue()) - dbgs() << " - c: " << *Acc.getWrittenValue() << "\n"; - else - dbgs() << " - c: <unknown>\n"; - } - } - } + dumpState(dbgs()); }); return Changed; @@ -1643,16 +1497,22 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { unsigned ArgNo = getIRPosition().getCallSiteArgNo(); ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, - nullptr, LengthVal); + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_WRITE, 0, + Changed, nullptr, LengthVal); } else if (ArgNo == 1) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed, + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_READ, 0, Changed, nullptr, LengthVal); } else { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); return indicatePessimisticFixpoint(); } + + LLVM_DEBUG({ + dbgs() << "Accesses by bin after update:\n"; + dumpState(dbgs()); + }); + return Changed; } @@ -1954,23 +1814,23 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { ChangeStatus Changed = ChangeStatus::UNCHANGED; - auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret, - bool) -> bool { - assert(AA::isValidInScope(V, Ret.getFunction()) && - "Assumed returned value should be valid in function scope!"); - if (ReturnedValues[&V].insert(&Ret)) - Changed = ChangeStatus::CHANGED; - return true; - }; - + SmallVector<AA::ValueAndContext> Values; bool UsedAssumedInformation = false; auto ReturnInstCB = [&](Instruction &I) { ReturnInst &Ret = cast<ReturnInst>(I); - return genericValueTraversal<ReturnInst>( - A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I, UsedAssumedInformation, /* UseValueSimplify */ true, - /* MaxValues */ 16, - /* StripCB */ nullptr, AA::Intraprocedural); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*Ret.getReturnValue()), + *this, Values, AA::Intraprocedural, + UsedAssumedInformation)) + Values.push_back({*Ret.getReturnValue(), Ret}); + + for (auto &VAC : Values) { + assert(AA::isValidInScope(*VAC.getValue(), Ret.getFunction()) && + "Assumed returned value should be valid in function scope!"); + if (ReturnedValues[VAC.getValue()].insert(&Ret)) + Changed = ChangeStatus::CHANGED; + } + return true; }; // Discover returned values from all live returned instructions in the @@ -2472,6 +2332,18 @@ struct AANonNullFloating : public AANonNullImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; InformationCache &InfoCache = A.getInfoCache(); @@ -2480,8 +2352,8 @@ struct AANonNullFloating : public AANonNullImpl { AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn); } - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANonNull::StateType &T, bool Stripped) -> bool { + AANonNull::StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -2495,12 +2367,9 @@ struct AANonNullFloating : public AANonNullImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -2753,8 +2622,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { if (!NoUndefAA.isKnownNoUndef()) continue; bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedVal = A.getAssumedSimplified( - IRPosition::value(*ArgVal), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedVal = + A.getAssumedSimplified(IRPosition::value(*ArgVal), *this, + UsedAssumedInformation, AA::Interprocedural); if (UsedAssumedInformation) continue; if (SimplifiedVal && !SimplifiedVal.value()) @@ -2925,8 +2795,9 @@ private: Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V, Instruction *I) { bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedV = A.getAssumedSimplified( - IRPosition::value(*V), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedV = + A.getAssumedSimplified(IRPosition::value(*V), *this, + UsedAssumedInformation, AA::Interprocedural); if (!UsedAssumedInformation) { // Don't depend on assumed values. if (!SimplifiedV) { @@ -3369,7 +3240,9 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { } } - if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this)) + if (!AA::isPotentiallyReachable( + A, *UserI, *getCtxI(), *this, + [ScopeFn](const Function &Fn) { return &Fn != ScopeFn; })) return true; } @@ -4364,10 +4237,23 @@ struct AADereferenceableFloating : AADereferenceableImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { + + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + const DataLayout &DL = A.getDataLayout(); + DerefState T; - auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, - bool Stripped) -> bool { + auto VisitValueCB = [&](const Value &V) -> bool { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); @@ -4420,12 +4306,9 @@ struct AADereferenceableFloating : AADereferenceableImpl { return T.isValidState(); }; - DerefState T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -4652,8 +4535,20 @@ struct AAAlignFloating : AAAlignImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); - auto VisitValueCB = [&](Value &V, const Instruction *, - AAAlign::StateType &T, bool Stripped) -> bool { + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + + StateType T; + auto VisitValueCB = [&](Value &V) -> bool { if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V)) return true; const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V), @@ -4686,15 +4581,13 @@ struct AAAlignFloating : AAAlignImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) { + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); + } - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. return clampStateAndIndicateChange(getState(), T); } @@ -4941,7 +4834,9 @@ struct AAInstanceInfoImpl : public AAInstanceInfo { return false; // If this call base might reach the scope again we might forward the // argument back here. This is very conservative. - if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr)) + if (AA::isPotentiallyReachable( + A, *CB, *Scope, *this, + [Scope](const Function &Fn) { return &Fn != Scope; })) return false; return true; } @@ -5518,9 +5413,9 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (const auto &NewV = VMap.lookup(&V)) return NewV; bool UsedAssumedInformation = false; - Optional<Value *> SimpleV = - A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) + Optional<Value *> SimpleV = A.getAssumedSimplified( + V, QueryingAA, UsedAssumedInformation, AA::Interprocedural); + if (!SimpleV.has_value()) return PoisonValue::get(&Ty); Value *EffectiveV = &V; if (SimpleV.value()) @@ -5561,8 +5456,8 @@ struct AAValueSimplifyImpl : AAValueSimplify { bool UsedAssumedInformation = false; Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue(); if (Simplify) - QueryingValueSimplified = - A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation); + QueryingValueSimplified = A.getAssumedSimplified( + IRP, QueryingAA, UsedAssumedInformation, AA::Interprocedural); return unionAssumed(QueryingValueSimplified); } @@ -5763,209 +5658,11 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { indicatePessimisticFixpoint(); } - /// Check if \p Cmp is a comparison we can simplify. - /// - /// We handle multiple cases, one in which at least one operand is an - /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other - /// operand. Return true if successful, in that case SimplifiedAssociatedValue - /// will be updated. - bool handleCmp(Attributor &A, CmpInst &Cmp) { - auto Union = [&](Value &V) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, &V, V.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - }; - - Value *LHS = Cmp.getOperand(0); - Value *RHS = Cmp.getOperand(1); - - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return true; - if (!SimplifiedLHS.value()) - return false; - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return true; - if (!SimplifiedRHS.value()) - return false; - RHS = *SimplifiedRHS; - - LLVMContext &Ctx = Cmp.getContext(); - // Handle the trivial case first in which we don't even need to think about - // null or non-null. - if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { - Constant *NewVal = - ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); - if (!Union(*NewVal)) - return false; - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - return true; - } - - // From now on we only handle equalities (==, !=). - ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); - if (!ICmp || !ICmp->isEquality()) - return false; - - bool LHSIsNull = isa<ConstantPointerNull>(LHS); - bool RHSIsNull = isa<ConstantPointerNull>(RHS); - if (!LHSIsNull && !RHSIsNull) - return false; - - // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the - // non-nullptr operand and if we assume it's non-null we can conclude the - // result of the comparison. - assert((LHSIsNull || RHSIsNull) && - "Expected nullptr versus non-nullptr comparison at this point"); - - // The index is the operand that we assume is not null. - unsigned PtrIdx = LHSIsNull; - auto &PtrNonNullAA = A.getAAFor<AANonNull>( - *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), - DepClassTy::REQUIRED); - if (!PtrNonNullAA.isAssumedNonNull()) - return false; - UsedAssumedInformation |= !PtrNonNullAA.isKnownNonNull(); - - // The new value depends on the predicate, true for != and false for ==. - Constant *NewVal = ConstantInt::get( - Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_NE); - if (!Union(*NewVal)) - return false; - - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - - return true; - } - - /// Use the generic, non-optimistic InstSimplfy functionality if we managed to - /// simplify any operand of the instruction \p I. Return true if successful, - /// in that case SimplifiedAssociatedValue will be updated. - bool handleGenericInst(Attributor &A, Instruction &I) { - bool SomeSimplified = false; - bool UsedAssumedInformation = false; - - SmallVector<Value *, 8> NewOps(I.getNumOperands()); - int Idx = 0; - for (Value *Op : I.operands()) { - const auto &SimplifiedOp = - A.getAssumedSimplified(IRPosition::value(*Op, getCallBaseContext()), - *this, UsedAssumedInformation); - // If we are not sure about any operand we are not sure about the entire - // instruction, we'll wait. - if (!SimplifiedOp) - return true; - - if (SimplifiedOp.value()) - NewOps[Idx] = SimplifiedOp.value(); - else - NewOps[Idx] = Op; - - SomeSimplified |= (NewOps[Idx] != Op); - ++Idx; - } - - // We won't bother with the InstSimplify interface if we didn't simplify any - // operand ourselves. - if (!SomeSimplified) - return false; - - InformationCache &InfoCache = A.getInfoCache(); - Function *F = I.getFunction(); - const auto *DT = - InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); - OptimizationRemarkEmitter *ORE = nullptr; - - const DataLayout &DL = I.getModule()->getDataLayout(); - SimplifyQuery Q(DL, TLI, DT, AC, &I); - if (Value *SimplifiedI = - simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, SimplifiedI, I.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - } - return false; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; - - // Do not simplify loads that are only used in llvm.assume if we cannot also - // remove all stores that may feed into the load. The reason is that the - // assume is probably worth something as long as the stores are around. - if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) { - InformationCache &InfoCache = A.getInfoCache(); - if (InfoCache.isOnlyUsedByAssume(*LI)) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - bool UsedAssumedInformation = false; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, *this, - UsedAssumedInformation, - /* OnlyExact */ true)) { - if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { - if (!I) - return true; - if (auto *SI = dyn_cast<StoreInst>(I)) - return A.isAssumedDead(SI->getOperandUse(0), this, - /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - })) - return indicatePessimisticFixpoint(); - } - } - } - - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, - bool Stripped) -> bool { - auto &AA = A.getAAFor<AAValueSimplify>( - *this, IRPosition::value(V, getCallBaseContext()), - DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { - - if (auto *I = dyn_cast<Instruction>(&V)) { - if (auto *Cmp = dyn_cast<CmpInst>(&V)) - if (handleCmp(A, *Cmp)) - return true; - if (handleGenericInst(A, *I)) - return true; - } - // TODO: Look the instruction and check recursively. - - LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V - << "\n"); - return false; - } - return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext())); - }; - - bool Dummy = false; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) - if (!askSimplifiedValueForOtherAAs(A)) - return indicatePessimisticFixpoint(); + if (!askSimplifiedValueForOtherAAs(A)) + return indicatePessimisticFixpoint(); // If a candicate was found in this update, return CHANGED. return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED @@ -6122,6 +5819,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { struct DeallocationInfo { /// The call that deallocates the memory. CallBase *const CB; + /// The value freed by the call. + Value *FreedOp; /// Flag to indicate if we don't know all objects this deallocation might /// free. @@ -6153,14 +5852,14 @@ struct AAHeapToStackFunction final : public AAHeapToStack { CallBase *CB = dyn_cast<CallBase>(&I); if (!CB) return true; - if (isFreeCall(CB, TLI)) { - DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB}; + if (Value *FreedOp = getFreedOperand(CB, TLI)) { + DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB, FreedOp}; return true; } // To do heap to stack, we need to know that the allocation itself is // removable once uses are rewritten, and that we can initialize the // alloca to the same pattern as the original allocation result. - if (isAllocationFn(CB, TLI) && isAllocRemovable(CB, TLI)) { + if (isRemovableAlloc(CB, TLI)) { auto *I8Ty = Type::getInt8Ty(CB->getParent()->getContext()); if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; @@ -6427,44 +6126,36 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { /* CheckBBLivenessOnly */ true)) continue; - // Use the optimistic version to get the freed objects, ignoring dead - // branches etc. - SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, - *this, DI.CB, - UsedAssumedInformation)) { - LLVM_DEBUG( - dbgs() - << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); + // Use the non-optimistic version to get the freed object. + Value *Obj = getUnderlyingObject(DI.FreedOp); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[H2S] Unknown underlying object for free!\n"); DI.MightFreeUnknownObjects = true; continue; } - // Check each object explicitly. - for (auto *Obj : Objects) { - // Free of null and undef can be ignored as no-ops (or UB in the latter - // case). - if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) - continue; - - CallBase *ObjCB = dyn_cast<CallBase>(Obj); - if (!ObjCB) { - LLVM_DEBUG(dbgs() - << "[H2S] Free of a non-call object: " << *Obj << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + // Free of null and undef can be ignored as no-ops (or UB in the latter + // case). + if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) + continue; - AllocationInfo *AI = AllocationInfos.lookup(ObjCB); - if (!AI) { - LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj - << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + CallBase *ObjCB = dyn_cast<CallBase>(Obj); + if (!ObjCB) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-call object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; + } - DI.PotentialAllocationCalls.insert(ObjCB); + AllocationInfo *AI = AllocationInfos.lookup(ObjCB); + if (!AI) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; } + + DI.PotentialAllocationCalls.insert(ObjCB); } }; @@ -7692,7 +7383,7 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U, const Instruction *UserI) { // The loaded value is unrelated to the pointer argument, no need to // follow the users of the load. - if (isa<LoadInst>(UserI)) + if (isa<LoadInst>(UserI) || isa<ReturnInst>(UserI)) return false; // By default we follow all uses assuming UserI might leak information on U, @@ -7822,16 +7513,15 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) : AAMemoryLocation(IRP, A), Allocator(A.Allocator) { - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - AccessKind2Accesses[u] = nullptr; + AccessKind2Accesses.fill(nullptr); } ~AAMemoryLocationImpl() { // The AccessSets are allocated via a BumpPtrAllocator, we call // the destructor manually. - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - if (AccessKind2Accesses[u]) - AccessKind2Accesses[u]->~AccessSet(); + for (AccessSet *AS : AccessKind2Accesses) + if (AS) + AS->~AccessSet(); } /// See AbstractAttribute::initialize(...). @@ -7999,7 +7689,7 @@ protected: /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind. using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>; - AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()]; + std::array<AccessSet *, llvm::CTLog2<VALID_STATE>()> AccessKind2Accesses; /// Categorize the pointer arguments of CB that might access memory in /// AccessedLoc and update the state and access map accordingly. @@ -8061,7 +7751,7 @@ void AAMemoryLocationImpl::categorizePtrValue( << Ptr << " [" << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; bool UsedAssumedInformation = false; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, UsedAssumedInformation, @@ -8670,19 +8360,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8723,10 +8413,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(*OpV, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8753,19 +8443,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8820,17 +8510,18 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - IntegerRangeState &T, bool Stripped) -> bool { + + IntegerRangeState T(getBitWidth()); + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { Instruction *I = dyn_cast<Instruction>(&V); if (!I || isa<CallBase>(I)) { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(V, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8880,13 +8571,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { return T.isValidState(); }; - IntegerRangeState T(getBitWidth()); - - bool UsedAssumedInformation = false; - if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) + if (!VisitValueCB(getAssociatedValue(), getCtxI())) return indicatePessimisticFixpoint(); // Ensure that long def-use chains can't cause circular reasoning either by @@ -8998,6 +8683,36 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues { AAPotentialConstantValues::initialize(A); } + bool fillSetWithConstantValues(Attributor &A, const IRPosition &IRP, SetTy &S, + bool &ContainsUndef) { + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(IRP, *this, Values, AA::Interprocedural, + UsedAssumedInformation)) { + if (!IRP.getAssociatedType()->isIntegerTy()) + return false; + auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRP, DepClassTy::REQUIRED); + if (!PotentialValuesAA.getState().isValidState()) + return false; + ContainsUndef = PotentialValuesAA.getState().undefIsContained(); + S = PotentialValuesAA.getState().getAssumedSet(); + return true; + } + + for (auto &It : Values) { + if (isa<UndefValue>(It.getValue())) + continue; + auto *CI = dyn_cast<ConstantInt>(It.getValue()); + if (!CI) + return false; + S.insert(CI->getValue()); + } + ContainsUndef = S.empty(); + + return true; + } + /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { std::string Str; @@ -9186,50 +8901,22 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); - // TODO: make use of undef flag to limit potential values aggressively. bool MaybeTrue = false, MaybeFalse = false; const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0); - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { // The result of any comparison between undefs can be soundly replaced // with undef. unionAssumedWithUndef(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, Zero, R); MaybeTrue |= CmpResult; @@ -9237,7 +8924,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (MaybeTrue & MaybeFalse) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, L, Zero); MaybeTrue |= CmpResult; @@ -9269,29 +8956,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = SI->getTrueValue(); Value *RHS = SI->getFalseValue(); - // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - Optional<Constant *> C = A.getAssumedConstant(*SI->getCondition(), *this, UsedAssumedInformation); @@ -9302,35 +8967,36 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { else if (C && *C && (*C)->isZeroValue()) OnlyRight = true; - const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr; - if (!OnlyRight) { - LHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } - if (!OnlyLeft) { - RHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!OnlyRight && !fillSetWithConstantValues(A, IRPosition::value(*LHS), + LHSAAPVS, LHSContainsUndef)) + return indicatePessimisticFixpoint(); + + if (!OnlyLeft && !fillSetWithConstantValues(A, IRPosition::value(*RHS), + RHSAAPVS, RHSContainsUndef)) + return indicatePessimisticFixpoint(); - if (!LHSAA || !RHSAA) { + if (OnlyLeft || OnlyRight) { // select (true/false), lhs, rhs - auto *OpAA = LHSAA ? LHSAA : RHSAA; + auto *OpAA = OnlyLeft ? &LHSAAPVS : &RHSAAPVS; + auto Undef = OnlyLeft ? LHSContainsUndef : RHSContainsUndef; - if (OpAA->undefIsContained()) + if (Undef) unionAssumedWithUndef(); - else - unionAssumed(*OpAA); + else { + for (auto &It : *OpAA) + unionAssumed(It); + } - } else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) { + } else if (LHSContainsUndef && RHSContainsUndef) { // select i1 *, undef , undef => undef unionAssumedWithUndef(); } else { - unionAssumed(*LHSAA); - unionAssumed(*RHSAA); + for (auto &It : LHSAAPVS) + unionAssumed(It); + for (auto &It : RHSAAPVS) + unionAssumed(It); } return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; @@ -9344,26 +9010,16 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth(); Value *Src = CI->getOperand(0); - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedSrc = - A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedSrc) - return ChangeStatus::UNCHANGED; - if (!SimplifiedSrc.value()) + bool SrcContainsUndef = false; + SetTy SrcPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*Src), SrcPVS, + SrcContainsUndef)) return indicatePessimisticFixpoint(); - Src = *SimplifiedSrc; - auto &SrcAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*Src), DepClassTy::REQUIRED); - if (!SrcAA.isValidState()) - return indicatePessimisticFixpoint(); - const SetTy &SrcAAPVS = SrcAA.getAssumedSet(); - if (SrcAA.undefIsContained()) + if (SrcContainsUndef) unionAssumedWithUndef(); else { - for (const APInt &S : SrcAAPVS) { + for (const APInt &S : SrcPVS) { APInt T = calculateCastInst(CI, S, ResultBitWidth); unionAssumed(T); } @@ -9377,53 +9033,26 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0); // TODO: make use of undef flag to limit potential values aggressively. - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero)) return indicatePessimisticFixpoint(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R)) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero)) return indicatePessimisticFixpoint(); @@ -9440,35 +9069,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { : ChangeStatus::CHANGED; } - ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) { - auto AssumedBefore = getAssumed(); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - Value *IncomingValue = PHI->getIncomingValue(u); - - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedIncomingValue = A.getAssumedSimplified( - IRPosition::value(*IncomingValue, getCallBaseContext()), *this, - UsedAssumedInformation); - if (!SimplifiedIncomingValue) - continue; - if (!SimplifiedIncomingValue.value()) - return indicatePessimisticFixpoint(); - IncomingValue = *SimplifiedIncomingValue; - - auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); - if (!PotentialValuesAA.isValidState()) - return indicatePessimisticFixpoint(); - if (PotentialValuesAA.undefIsContained()) - unionAssumedWithUndef(); - else - unionAssumed(PotentialValuesAA.getAssumed()); - } - return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); @@ -9486,9 +9086,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (auto *BinOp = dyn_cast<BinaryOperator>(I)) return updateWithBinaryOperator(A, BinOp); - if (auto *PHI = dyn_cast<PHINode>(I)) - return updateWithPHINode(A, PHI); - return indicatePessimisticFixpoint(); } @@ -9642,7 +9239,8 @@ struct AANoUndefImpl : AANoUndef { // A position whose simplified value does not have any value is // considered to be dead. We don't manifest noundef in such positions for // the same reason above. - if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation) + if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation, + AA::Interprocedural) .has_value()) return ChangeStatus::UNCHANGED; return AANoUndef::manifest(A); @@ -9663,11 +9261,19 @@ struct AANoUndefFloating : public AANoUndefImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANoUndef::StateType &T, bool Stripped) -> bool { + + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + } + + StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V), DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { + if (this == &AA) { T.indicatePessimisticFixpoint(); } else { const AANoUndef::StateType &S = @@ -9677,12 +9283,9 @@ struct AANoUndefFloating : public AANoUndefImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -9782,8 +9385,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Change = ChangeStatus::UNCHANGED; - auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown, - bool Stripped) -> bool { + auto VisitValue = [&](Value &V, const Instruction *CtxI) -> bool { if (Function *Fn = dyn_cast<Function>(&V)) { addCalledFunction(Fn, Change); } else { @@ -9795,17 +9397,17 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { return true; }; + SmallVector<AA::ValueAndContext> Values; // Process any value that we might call. - auto ProcessCalledOperand = [&](Value *V) { - bool DummyValue = false; + auto ProcessCalledOperand = [&](Value *V, Instruction *CtxI) { bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this, - DummyValue, VisitValue, nullptr, - UsedAssumedInformation, false)) { - // If we haven't gone through all values, assume that there are unknown - // callees. - setHasUnknownCallee(true, Change); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*V), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({*V, CtxI}); } + for (auto &VAC : Values) + VisitValue(*VAC.getValue(), VAC.getCtxI()); }; CallBase *CB = cast<CallBase>(getCtxI()); @@ -9828,13 +9430,13 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } // The most simple case. - ProcessCalledOperand(CB->getCalledOperand()); + ProcessCalledOperand(CB->getCalledOperand(), CB); // Process callback functions. SmallVector<const Use *, 4u> CallbackUses; AbstractCallSite::getCallbackUses(*CB, CallbackUses); for (const Use *U : CallbackUses) - ProcessCalledOperand(U->get()); + ProcessCalledOperand(U->get(), CB); return Change; } @@ -9920,8 +9522,11 @@ private: for (auto *AAEdges : AAEdgesList) { if (AAEdges->hasUnknownCallee()) { - if (!CanReachUnknownCallee) + if (!CanReachUnknownCallee) { + LLVM_DEBUG(dbgs() + << "[QueryResolver] Edges include unknown callee!\n"); Change = ChangeStatus::CHANGED; + } CanReachUnknownCallee = true; return Change; } @@ -10065,14 +9670,10 @@ public: } bool instructionCanReach(Attributor &A, const Instruction &Inst, - const Function &Fn, - bool UseBackwards) const override { + const Function &Fn) const override { if (!isValidState()) return true; - if (UseBackwards) - return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); - const auto &Reachability = A.getAAFor<AAReachability>( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); @@ -10085,8 +9686,11 @@ public: // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst]; - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges known, " + "may reach unknown callee!\n"); InstQSet.CanReachUnknownCallee = true; + } return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); } @@ -10119,8 +9723,11 @@ public: bool AllKnown = getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); // Update will return change if we this effects any queries. - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges " + "known, may reach unknown callee!\n"); InstPair.second.CanReachUnknownCallee = true; + } Change |= InstPair.second.update(A, *this, CallEdges); } } @@ -10133,8 +9740,11 @@ public: WholeFunction.Reachable.size() + WholeFunction.Unreachable.size(); return "FunctionReachability [" + - std::to_string(WholeFunction.Reachable.size()) + "," + - std::to_string(QueryCount) + "]"; + (canReachUnknownCallee() + ? "unknown" + : (std::to_string(WholeFunction.Reachable.size()) + "," + + std::to_string(QueryCount))) + + "]"; } void trackStatistics() const override {} @@ -10156,6 +9766,822 @@ private: }; } // namespace +template <typename AAType> +static Optional<Constant *> +askForAssumedConstant(Attributor &A, const AbstractAttribute &QueryingAA, + const IRPosition &IRP, Type &Ty) { + if (!Ty.isIntegerTy()) + return nullptr; + + // This will also pass the call base context. + const auto &AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE); + + Optional<Constant *> COpt = AA.getAssumedConstant(A); + + if (!COpt.has_value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return llvm::None; + } + if (auto *C = COpt.value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return C; + } + return nullptr; +} + +Value *AAPotentialValues::getSingleValue( + Attributor &A, const AbstractAttribute &AA, const IRPosition &IRP, + SmallVectorImpl<AA::ValueAndContext> &Values) { + Type &Ty = *IRP.getAssociatedType(); + Optional<Value *> V; + for (auto &It : Values) { + V = AA::combineOptionalValuesInAAValueLatice(V, It.getValue(), &Ty); + if (V.has_value() && !V.value()) + break; + } + if (!V.has_value()) + return UndefValue::get(&Ty); + return V.value(); +} + +namespace { +struct AAPotentialValuesImpl : AAPotentialValues { + using StateType = PotentialLLVMValuesState; + + AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialValues(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) { + indicatePessimisticFixpoint(); + return; + } + Value *Stripped = getAssociatedValue().stripPointerCasts(); + if (isa<Constant>(Stripped)) { + addValue(A, getState(), *Stripped, getCtxI(), AA::AnyScope, + getAnchorScope()); + indicateOptimisticFixpoint(); + return; + } + AAPotentialValues::initialize(A); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << getState(); + return OS.str(); + } + + template <typename AAType> + static Optional<Value *> askOtherAA(Attributor &A, + const AbstractAttribute &AA, + const IRPosition &IRP, Type &Ty) { + if (isa<Constant>(IRP.getAssociatedValue())) + return &IRP.getAssociatedValue(); + Optional<Constant *> C = askForAssumedConstant<AAType>(A, AA, IRP, Ty); + if (!C) + return llvm::None; + if (C.value()) + if (auto *CC = AA::getWithType(**C, Ty)) + return CC; + return nullptr; + } + + void addValue(Attributor &A, StateType &State, Value &V, + const Instruction *CtxI, AA::ValueScope S, + Function *AnchorScope) const { + + IRPosition ValIRP = IRPosition::value(V); + if (auto *CB = dyn_cast_or_null<CallBase>(CtxI)) { + for (auto &U : CB->args()) { + if (U.get() != &V) + continue; + ValIRP = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)); + break; + } + } + + Value *VPtr = &V; + if (ValIRP.getAssociatedType()->isIntegerTy()) { + Type &Ty = *getAssociatedType(); + Optional<Value *> SimpleV = + askOtherAA<AAValueConstantRange>(A, *this, ValIRP, Ty); + if (SimpleV.has_value() && !SimpleV.value()) { + auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>( + *this, ValIRP, DepClassTy::OPTIONAL); + if (PotentialConstantsAA.isValidState()) { + for (auto &It : PotentialConstantsAA.getAssumedSet()) { + State.unionAssumed({{*ConstantInt::get(&Ty, It), nullptr}, S}); + } + assert(!PotentialConstantsAA.undefIsContained() && + "Undef should be an explicit value!"); + return; + } + } + if (!SimpleV.has_value()) + return; + + if (SimpleV.value()) + VPtr = SimpleV.value(); + } + + if (isa<ConstantInt>(VPtr)) + CtxI = nullptr; + if (!AA::isValidInScope(*VPtr, AnchorScope)) + S = AA::ValueScope(S | AA::Interprocedural); + + State.unionAssumed({{*VPtr, CtxI}, S}); + } + + /// Helper struct to tie a value+context pair together with the scope for + /// which this is the simplified version. + struct ItemInfo { + AA::ValueAndContext I; + AA::ValueScope S; + }; + + bool recurseForValue(Attributor &A, const IRPosition &IRP, AA::ValueScope S) { + SmallMapVector<AA::ValueAndContext, int, 8> ValueScopeMap; + for (auto CS : {AA::Intraprocedural, AA::Interprocedural}) { + if (!(CS & S)) + continue; + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRP, this, Values, CS, + UsedAssumedInformation)) + return false; + + for (auto &It : Values) + ValueScopeMap[It] += CS; + } + for (auto &It : ValueScopeMap) + addValue(A, getState(), *It.first.getValue(), It.first.getCtxI(), + AA::ValueScope(It.second), getAnchorScope()); + + return true; + } + + void giveUpOnIntraprocedural(Attributor &A) { + auto NewS = StateType::getBestState(getState()); + for (auto &It : getAssumedSet()) { + if (It.second == AA::Intraprocedural) + continue; + addValue(A, NewS, *It.first.getValue(), It.first.getCtxI(), + AA::Interprocedural, getAnchorScope()); + } + assert(!undefIsContained() && "Undef should be an explicit value!"); + addValue(A, NewS, getAssociatedValue(), getCtxI(), AA::Intraprocedural, + getAnchorScope()); + getState() = NewS; + } + + /// See AbstractState::indicatePessimisticFixpoint(...). + ChangeStatus indicatePessimisticFixpoint() override { + getState() = StateType::getBestState(getState()); + getState().unionAssumed({{getAssociatedValue(), getCtxI()}, AA::AnyScope}); + AAPotentialValues::indicateOptimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + SmallVector<AA::ValueAndContext> Values; + for (AA::ValueScope S : {AA::Interprocedural, AA::Intraprocedural}) { + Values.clear(); + if (!getAssumedSimplifiedValues(A, Values, S)) + continue; + Value &OldV = getAssociatedValue(); + if (isa<UndefValue>(OldV)) + continue; + Value *NewV = getSingleValue(A, *this, getIRPosition(), Values); + if (!NewV || NewV == &OldV) + continue; + if (getCtxI() && + !AA::isValidAtPosition({*NewV, *getCtxI()}, A.getInfoCache())) + continue; + if (A.changeAfterManifest(getIRPosition(), *NewV)) + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; + } + + bool getAssumedSimplifiedValues(Attributor &A, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope S) const override { + if (!isValidState()) + return false; + for (auto &It : getAssumedSet()) + if (It.second & S) + Values.push_back(It.first); + assert(!undefIsContained() && "Undef should be an explicit value!"); + return true; + } +}; + +struct AAPotentialValuesFloating : AAPotentialValuesImpl { + AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + genericValueTraversal(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// Helper struct to remember which AAIsDead instances we actually used. + struct LivenessInfo { + const AAIsDead *LivenessAA = nullptr; + bool AnyDead = false; + }; + + /// Check if \p Cmp is a comparison we can simplify. + /// + /// We handle multiple cases, one in which at least one operand is an + /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other + /// operand. Return true if successful, in that case Worklist will be updated. + bool handleCmp(Attributor &A, CmpInst &Cmp, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + Value *LHS = Cmp.getOperand(0); + Value *RHS = Cmp.getOperand(1); + + // Simplify the operands first. + bool UsedAssumedInformation = false; + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedLHS.has_value()) + return true; + if (!SimplifiedLHS.value()) + return false; + LHS = *SimplifiedLHS; + + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedRHS.has_value()) + return true; + if (!SimplifiedRHS.value()) + return false; + RHS = *SimplifiedRHS; + + LLVMContext &Ctx = Cmp.getContext(); + // Handle the trivial case first in which we don't even need to think about + // null or non-null. + if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { + Constant *NewV = + ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, + getAnchorScope()); + return true; + } + + // From now on we only handle equalities (==, !=). + ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); + if (!ICmp || !ICmp->isEquality()) + return false; + + bool LHSIsNull = isa<ConstantPointerNull>(LHS); + bool RHSIsNull = isa<ConstantPointerNull>(RHS); + if (!LHSIsNull && !RHSIsNull) + return false; + + // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the + // non-nullptr operand and if we assume it's non-null we can conclude the + // result of the comparison. + assert((LHSIsNull || RHSIsNull) && + "Expected nullptr versus non-nullptr comparison at this point"); + + // The index is the operand that we assume is not null. + unsigned PtrIdx = LHSIsNull; + auto &PtrNonNullAA = A.getAAFor<AANonNull>( + *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), + DepClassTy::REQUIRED); + if (!PtrNonNullAA.isAssumedNonNull()) + return false; + + // The new value depends on the predicate, true for != and false for ==. + Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx), + ICmp->getPredicate() == CmpInst::ICMP_NE); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, getAnchorScope()); + return true; + } + + bool handleSelectInst(Attributor &A, SelectInst &SI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + const Instruction *CtxI = II.I.getCtxI(); + bool UsedAssumedInformation = false; + + Optional<Constant *> C = + A.getAssumedConstant(*SI.getCondition(), *this, UsedAssumedInformation); + bool NoValueYet = !C.has_value(); + if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) + return true; + if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { + if (CI->isZero()) + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + else + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + } else { + // We could not simplify the condition, assume both values. + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + } + return true; + } + + bool handleLoadInst(Attributor &A, LoadInst &LI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + SmallSetVector<Value *, 4> PotentialCopies; + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + bool UsedAssumedInformation = false; + if (!AA::getPotentiallyLoadedValues(A, LI, PotentialCopies, + PotentialValueOrigins, *this, + UsedAssumedInformation, + /* OnlyExact */ true)) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Failed to get potentially " + "loaded values for load instruction " + << LI << "\n"); + return false; + } + + // Do not simplify loads that are only used in llvm.assume if we cannot also + // remove all stores that may feed into the load. The reason is that the + // assume is probably worth something as long as the stores are around. + InformationCache &InfoCache = A.getInfoCache(); + if (InfoCache.isOnlyUsedByAssume(LI)) { + if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { + if (!I) + return true; + if (auto *SI = dyn_cast<StoreInst>(I)) + return A.isAssumedDead(SI->getOperandUse(0), this, + /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + })) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Load is onl used by assumes " + "and we cannot delete all the stores: " + << LI << "\n"); + return false; + } + } + + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., + // stack locations in different recursive calls). + const Instruction *CtxI = II.I.getCtxI(); + bool ScopeIsLocal = (II.S & AA::Intraprocedural); + bool AllLocal = ScopeIsLocal; + bool DynamicallyUnique = llvm::all_of(PotentialCopies, [&](Value *PC) { + AllLocal &= AA::isValidInScope(*PC, getAnchorScope()); + return AA::isDynamicallyUnique(A, *this, *PC); + }); + if (!DynamicallyUnique) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Not all potentially loaded " + "values are dynamically unique: " + << LI << "\n"); + return false; + } + + for (auto *PotentialCopy : PotentialCopies) { + if (AllLocal) { + Worklist.push_back({{*PotentialCopy, CtxI}, II.S}); + } else { + Worklist.push_back({{*PotentialCopy, CtxI}, AA::Interprocedural}); + } + } + if (!AllLocal && ScopeIsLocal) + addValue(A, getState(), LI, CtxI, AA::Intraprocedural, getAnchorScope()); + return true; + } + + bool handlePHINode( + Attributor &A, PHINode &PHI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { + LivenessInfo &LI = LivenessAAs[&F]; + if (!LI.LivenessAA) + LI.LivenessAA = &A.getAAFor<AAIsDead>(*this, IRPosition::function(F), + DepClassTy::NONE); + return LI; + }; + + LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); + for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { + BasicBlock *IncomingBB = PHI.getIncomingBlock(u); + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { + LI.AnyDead = true; + continue; + } + Worklist.push_back( + {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); + } + return true; + } + + /// Use the generic, non-optimistic InstSimplfy functionality if we managed to + /// simplify any operand of the instruction \p I. Return true if successful, + /// in that case Worklist will be updated. + bool handleGenericInst(Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + bool SomeSimplified = false; + bool UsedAssumedInformation = false; + + SmallVector<Value *, 8> NewOps(I.getNumOperands()); + int Idx = 0; + for (Value *Op : I.operands()) { + const auto &SimplifiedOp = A.getAssumedSimplified( + IRPosition::value(*Op, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + // If we are not sure about any operand we are not sure about the entire + // instruction, we'll wait. + if (!SimplifiedOp.has_value()) + return true; + + if (SimplifiedOp.value()) + NewOps[Idx] = SimplifiedOp.value(); + else + NewOps[Idx] = Op; + + SomeSimplified |= (NewOps[Idx] != Op); + ++Idx; + } + + // We won't bother with the InstSimplify interface if we didn't simplify any + // operand ourselves. + if (!SomeSimplified) + return false; + + InformationCache &InfoCache = A.getInfoCache(); + Function *F = I.getFunction(); + const auto *DT = + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); + OptimizationRemarkEmitter *ORE = nullptr; + + const DataLayout &DL = I.getModule()->getDataLayout(); + SimplifyQuery Q(DL, TLI, DT, AC, &I); + Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q, ORE); + if (!NewV || NewV == &I) + return false; + + LLVM_DEBUG(dbgs() << "Generic inst " << I << " assumed simplified to " + << *NewV << "\n"); + Worklist.push_back({{*NewV, II.I.getCtxI()}, II.S}); + return true; + } + + bool simplifyInstruction( + Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + if (auto *CI = dyn_cast<CmpInst>(&I)) + if (handleCmp(A, *CI, II, Worklist)) + return true; + + switch (I.getOpcode()) { + case Instruction::Select: + return handleSelectInst(A, cast<SelectInst>(I), II, Worklist); + case Instruction::PHI: + return handlePHINode(A, cast<PHINode>(I), II, Worklist, LivenessAAs); + case Instruction::Load: + return handleLoadInst(A, cast<LoadInst>(I), II, Worklist); + default: + return handleGenericInst(A, I, II, Worklist); + }; + return false; + } + + void genericValueTraversal(Attributor &A) { + SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; + + Value *InitialV = &getAssociatedValue(); + SmallSet<AA::ValueAndContext, 16> Visited; + SmallVector<ItemInfo, 16> Worklist; + Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope}); + + int Iteration = 0; + do { + ItemInfo II = Worklist.pop_back_val(); + Value *V = II.I.getValue(); + assert(V); + const Instruction *CtxI = II.I.getCtxI(); + AA::ValueScope S = II.S; + + // Check if we should process the current value. To prevent endless + // recursion keep a record of the values we followed! + if (!Visited.insert(II.I).second) + continue; + + // Make sure we limit the compile time for complex expressions. + if (Iteration++ >= MaxPotentialValuesIterations) { + LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " + << Iteration << "!\n"); + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + continue; + } + + // Explicitly look through calls with a "returned" attribute if we do + // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; + if (V->getType()->isPointerTy()) { + NewV = AA::getWithType(*V->stripPointerCasts(), *V->getType()); + } else { + auto *CB = dyn_cast<CallBase>(V); + if (CB && CB->getCalledFunction()) { + for (Argument &Arg : CB->getCalledFunction()->args()) + if (Arg.hasReturnedAttr()) { + NewV = CB->getArgOperand(Arg.getArgNo()); + break; + } + } + } + if (NewV && NewV != V) { + Worklist.push_back({{*NewV, CtxI}, S}); + continue; + } + + if (auto *I = dyn_cast<Instruction>(V)) { + if (simplifyInstruction(A, *I, II, Worklist, LivenessAAs)) + continue; + } + + if (V != InitialV || isa<Argument>(V)) + if (recurseForValue(A, IRPosition::value(*V), II.S)) + continue; + + // If we haven't stripped anything we give up. + if (V == InitialV && CtxI == getCtxI()) { + indicatePessimisticFixpoint(); + return; + } + + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + } while (!Worklist.empty()); + + // If we actually used liveness information so we have to record a + // dependence. + for (auto &It : LivenessAAs) + if (It.second.AnyDead) + A.recordDependence(*It.second.LivenessAA, *this, DepClassTy::OPTIONAL); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(potential_values) + } +}; + +struct AAPotentialValuesArgument final : AAPotentialValuesImpl { + using Base = AAPotentialValuesImpl; + AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + auto &Arg = cast<Argument>(getAssociatedValue()); + if (Arg.hasPointeeInMemoryValueAttr()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + unsigned CSArgNo = getCallSiteArgNo(); + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + auto CallSitePred = [&](AbstractCallSite ACS) { + const auto CSArgIRP = IRPosition::callsite_argument(ACS, CSArgNo); + if (CSArgIRP.getPositionKind() == IRP_INVALID) + return false; + + if (!A.getAssumedSimplifiedValues(CSArgIRP, this, Values, + AA::Interprocedural, + UsedAssumedInformation)) + return false; + + return isValidState(); + }; + + if (!A.checkForAllCallSites(CallSitePred, *this, + /* RequireAllCallSites */ true, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Fn = getAssociatedFunction(); + bool AnyNonLocal = false; + for (auto &It : Values) { + if (isa<Constant>(It.getValue())) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + if (!AA::isDynamicallyUnique(A, *this, *It.getValue())) + return indicatePessimisticFixpoint(); + + if (auto *Arg = dyn_cast<Argument>(It.getValue())) + if (Arg->getParent() == Fn) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::Interprocedural, + getAnchorScope()); + AnyNonLocal = true; + } + if (undefIsContained()) + unionAssumedWithUndef(); + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(potential_values) + } +}; + +struct AAPotentialValuesReturned + : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> { + using Base = + AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>; + AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) + indicatePessimisticFixpoint(); + else + AAPotentialValues::initialize(A); + } + + ChangeStatus manifest(Attributor &A) override { + // We queried AAValueSimplify for the returned values so they will be + // replaced if a simplified form was found. Nothing to do here. + return ChangeStatus::UNCHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesFunction : AAPotentialValuesImpl { + AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " + "not be called"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSite : AAPotentialValuesFunction { + AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFunction(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteReturned : AAPotentialValuesImpl { + AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + Function *Callee = getAssociatedFunction(); + if (!Callee) + return indicatePessimisticFixpoint(); + + bool UsedAssumedInformation = false; + auto *CB = cast<CallBase>(getCtxI()); + if (CB->isMustTailCall() && + !A.isAssumedDead(IRPosition::inst(*CB), this, nullptr, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Intraprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Caller = CB->getCaller(); + + bool AnyNonLocal = false; + for (auto &It : Values) { + Value *V = It.getValue(); + Optional<Value *> CallerV = A.translateArgumentToCallSiteContent( + V, *CB, *this, UsedAssumedInformation); + if (!CallerV.has_value()) { + // Nothing to do as long as no value was determined. + continue; + } + V = CallerV.value() ? CallerV.value() : V; + if (AA::isDynamicallyUnique(A, *this, *V) && + AA::isValidInScope(*V, Caller)) { + if (CallerV.value()) { + SmallVector<AA::ValueAndContext> ArgValues; + IRPosition IRP = IRPosition::value(*V); + if (auto *Arg = dyn_cast<Argument>(V)) + if (Arg->getParent() == CB->getCalledFunction()) + IRP = IRPosition::callsite_argument(*CB, Arg->getArgNo()); + if (recurseForValue(A, IRP, AA::AnyScope)) + continue; + } + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + break; + } + } + if (AnyNonLocal) { + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Interprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + AnyNonLocal = false; + getState() = PotentialLLVMValuesState::getBestState(); + for (auto &It : Values) { + Value *V = It.getValue(); + if (!AA::isDynamicallyUnique(A, *this, *V)) + return indicatePessimisticFixpoint(); + if (AA::isValidInScope(*V, Caller)) { + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + addValue(A, getState(), *V, CB, AA::Interprocedural, + getAnchorScope()); + } + } + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + } + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { + AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(potential_values) + } +}; +} // namespace + /// ---------------------- Assumption Propagation ------------------------------ namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { @@ -10323,6 +10749,7 @@ const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; const char AAPotentialConstantValues::ID = 0; +const char AAPotentialValues::ID = 0; const char AANoUndef::ID = 0; const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; @@ -10441,6 +10868,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 56e2df14ff38..360ec24a0509 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1147,6 +1147,14 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, // Declare a callback for the internalize pass that will ask for every // candidate GlobalValue if it can be internalized or not. auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { + // It may be the case that GV is on a chain of an ifunc, its alias and + // subsequent aliases. In this case, the summary for the value is not + // available. + if (isa<GlobalIFunc>(&GV) || + (isa<GlobalAlias>(&GV) && + isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject()))) + return true; + // Lookup the linkage recorded in the summaries during global analysis. auto GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) { @@ -1277,7 +1285,7 @@ Expected<bool> FunctionImporter::importFunctions( } } for (GlobalAlias &GA : SrcModule->aliases()) { - if (!GA.hasName()) + if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject())) continue; auto GUID = GA.getGUID(); auto Import = ImportGUIDs.count(GUID); @@ -1413,29 +1421,6 @@ static bool doImportingForModule(Module &M) { return *Result; } -namespace { - -/// Pass that performs cross-module function import provided a summary file. -class FunctionImportLegacyPass : public ModulePass { -public: - /// Pass identification, replacement for typeid - static char ID; - - explicit FunctionImportLegacyPass() : ModulePass(ID) {} - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Function Importing"; } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - return doImportingForModule(M); - } -}; - -} // end anonymous namespace - PreservedAnalyses FunctionImportPass::run(Module &M, ModuleAnalysisManager &AM) { if (!doImportingForModule(M)) @@ -1443,15 +1428,3 @@ PreservedAnalyses FunctionImportPass::run(Module &M, return PreservedAnalyses::none(); } - -char FunctionImportLegacyPass::ID = 0; -INITIALIZE_PASS(FunctionImportLegacyPass, "function-import", - "Summary Based Function Import", false, false) - -namespace llvm { - -Pass *createFunctionImportPass() { - return new FunctionImportLegacyPass(); -} - -} // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1ad6e2b2a1d2..ec26db8bfc0b 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1040,7 +1040,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!isAllocRemovable(CI, TLI)) + if (!isRemovableAlloc(CI, TLI)) // Must be able to remove the call when we get done.. return false; diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index ec2b80012ed6..dfd434e61d5b 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -44,7 +44,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorLegacyPassPass(Registry); initializeBlockExtractorLegacyPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); @@ -60,9 +59,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); initializeEliminateAvailableExternallyLegacyPassPass(Registry); - initializeSampleProfileLoaderLegacyPassPass(Registry); - initializeFunctionImportLegacyPassPass(Registry); - initializeWholeProgramDevirtPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 5aa5b905f06c..85b1a8303d33 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -28,6 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -40,13 +41,13 @@ STATISTIC(NumAliases, "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals, "Number of global vars internalized"); -// APIFile - A file which contains a list of symbols that should not be marked -// external. +// APIFile - A file which contains a list of symbol glob patterns that should +// not be marked external. static cl::opt<std::string> APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve")); -// APIList - A list of symbols that should not be marked internal. +// APIList - A list of symbol glob patterns that should not be marked internal. static cl::list<std::string> APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); @@ -59,29 +60,44 @@ public: PreserveAPIList() { if (!APIFile.empty()) LoadFile(APIFile); - ExternalNames.insert(APIList.begin(), APIList.end()); + for (StringRef Pattern : APIList) + addGlob(Pattern); } bool operator()(const GlobalValue &GV) { - return ExternalNames.count(GV.getName()); + return llvm::any_of( + ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); }); } private: // Contains the set of symbols loaded from file - StringSet<> ExternalNames; + SmallVector<GlobPattern> ExternalNames; + + void addGlob(StringRef Pattern) { + auto GlobOrErr = GlobPattern::create(Pattern); + if (!GlobOrErr) { + errs() << "WARNING: when loading pattern: '" + << toString(GlobOrErr.takeError()) << "' ignoring"; + return; + } + ExternalNames.emplace_back(std::move(*GlobOrErr)); + } void LoadFile(StringRef Filename) { // Load the APIFile... - ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = + ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(Filename); - if (!Buf) { + if (!BufOrErr) { errs() << "WARNING: Internalize couldn't load file '" << Filename << "'! Continuing as if it's empty.\n"; return; // Just continue as if the file were empty } - for (line_iterator I(*Buf->get(), true), E; I != E; ++I) - ExternalNames.insert(*I); + Buf = std::move(*BufOrErr); + for (line_iterator I(*Buf, true), E; I != E; ++I) + addGlob(*I); } + + std::shared_ptr<MemoryBuffer> Buf; }; } // end anonymous namespace diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index d5f1d291f41f..6bf25df101fa 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -528,50 +528,8 @@ public: // arguments. For testing purposes only. static bool runForTesting(Module &M); }; - -struct LowerTypeTests : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary; - const ModuleSummaryIndex *ImportSummary; - bool DropTypeTests; - - LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary), - DropTypeTests(DropTypeTests || ClDropTypeTests) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (UseCommandLine) - return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) - .lower(); - } -}; - } // end anonymous namespace -char LowerTypeTests::ID = 0; - -INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, - false) - -ModulePass * -llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, - bool DropTypeTests) { - return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); -} - /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. BitSetInfo LowerTypeTestsModule::buildBitSet( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8e0ca8c6c997..0b42fc151991 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4808,7 +4808,7 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { if (auto *LI = dyn_cast<LoadInst>(&I)) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Interprocedural); } else if (auto *SI = dyn_cast<StoreInst>(&I)) { A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 8eef82675e86..f1b6f2bb7de4 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -91,14 +91,6 @@ cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading."), cl::init(false), cl::Hidden); -static cl::opt<bool> - EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable preparation for ThinLTO.")); - -static cl::opt<bool> - EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable performing ThinLTO.")); - cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass")); @@ -192,15 +184,6 @@ PassManagerBuilder::PassManagerBuilder() { VerifyInput = false; VerifyOutput = false; MergeFunctions = false; - PrepareForLTO = false; - EnablePGOInstrGen = false; - EnablePGOCSInstrGen = false; - EnablePGOCSInstrUse = false; - PGOInstrGen = ""; - PGOInstrUse = ""; - PGOSampleUse = ""; - PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; CallGraphProfile = true; } @@ -390,7 +373,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); @@ -470,10 +453,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); - - if (EnableCHR && OptLevel >= 3 && - (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) - MPM.add(createControlHeightReductionLegacyPass()); } /// FIXME: Should LTO cause any differences to this set of passes? @@ -598,15 +577,6 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { MPM.add(createAnnotation2MetadataLegacyPass()); - if (!PGOSampleUse.empty()) { - MPM.add(createPruneEHPass()); - // In ThinLTO mode, when flattened profile is used, all the available - // profile information will be annotated in PreLink phase so there is - // no need to load the profile again in PostLink. - if (!(FlattenedProfileUsed && PerformThinLTO)) - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); - } - // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); @@ -628,26 +598,8 @@ void PassManagerBuilder::populateModulePassManager( else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - // Drop available_externally and unreferenced globals. This is necessary - // with ThinLTO in order to avoid leaving undefined references to dead - // globals in the object file. - MPM.add(createEliminateAvailableExternallyPass()); - MPM.add(createGlobalDCEPass()); - } - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); - if (PrepareForLTO || PrepareForThinLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - // This has to be done after we add the extensions to the pass manager - // as there could be passes (e.g. Adddress sanitizer) which introduce - // new unnamed globals. - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); return; } @@ -658,25 +610,6 @@ void PassManagerBuilder::populateModulePassManager( addInitialAliasAnalysisPasses(MPM); - // For ThinLTO there are two passes of indirect call promotion. The - // first is during the compile phase when PerformThinLTO=false and - // intra-module indirect call targets are promoted. The second is during - // the ThinLTO backend when PerformThinLTO=true, when we promote imported - // inter-module indirect calls. For that we perform indirect call promotion - // earlier in the pass pipeline, here before globalopt. Otherwise imported - // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - } - - // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops - // as it will change the CFG too much to make the 2nd profile annotation - // in backend more difficult. - bool PrepareForThinLTOUsingPGOSampleProfile = - PrepareForThinLTO && !PGOSampleUse.empty(); - if (PrepareForThinLTOUsingPGOSampleProfile) - DisableUnrollLoops = true; - // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); @@ -744,7 +677,7 @@ void PassManagerBuilder::populateModulePassManager( if (RunPartialInlining) MPM.add(createPartialInliningPass()); - if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) + if (OptLevel > 1) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -756,9 +689,6 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - if (EnableOrderFileInstrumentation) - MPM.add(createInstrOrderFilePass()); - MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, @@ -772,24 +702,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalDCEPass()); } - // If we are planning to perform ThinLTO later, let's not bloat the code with - // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes - // during ThinLTO and perform the rest of the optimizations afterward. - if (PrepareForThinLTO) { - // Ensure we perform any last passes, but do so before renaming anonymous - // globals in case the passes add any. - addExtensionsToPM(EP_OptimizerLast, MPM); - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - MPM.add(createNameAnonGlobalPass()); - return; - } - - if (PerformThinLTO) - // Optimize globals now when performing ThinLTO, this enables more - // optimizations later. - MPM.add(createGlobalOptimizerPass()); - // Scheduling LoopVersioningLICM when inlining is over, because after that // we may see more accurate aliasing. Reason to run this late is that too // early versioning may prevent further inlining due to increase of code @@ -834,7 +746,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -856,7 +768,7 @@ void PassManagerBuilder::populateModulePassManager( // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + if (EnableHotColdSplit) MPM.add(createHotColdSplittingPass()); if (EnableIROutliner) @@ -865,10 +777,6 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - // Add Module flag "CG Profile" based on Branch Frequency Information. - if (CallGraphProfile) - MPM.add(createCGProfileLegacyPass()); - // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -889,12 +797,6 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_OptimizerLast, MPM); - if (PrepareForLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to handle them in the summary - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 55fee213cd5f..f76b886e810a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -546,53 +546,6 @@ private: return AnnotatedPassName.c_str(); } }; - -class SampleProfileLoaderLegacyPass : public ModulePass { -public: - // Class identification, replacement for typeinfo - static char ID; - - SampleProfileLoaderLegacyPass( - StringRef Name = SampleProfileFile, - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : ModulePass(ID), SampleLoader( - Name, SampleProfileRemappingFile, LTOPhase, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }, - [&](Function &F) -> TargetLibraryInfo & { - return TLIWP->getTLI(F); - }) { - initializeSampleProfileLoaderLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void dump() { SampleLoader.dump(); } - - bool doInitialization(Module &M) override { - return SampleLoader.doInitialization(M); - } - - StringRef getPassName() const override { return "Sample profile pass"; } - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - } - -private: - SampleProfileLoader SampleLoader; - AssumptionCacheTracker *ACT = nullptr; - TargetTransformInfoWrapperPass *TTIWP = nullptr; - TargetLibraryInfoWrapperPass *TLIWP = nullptr; -}; - } // end anonymous namespace ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { @@ -734,8 +687,8 @@ SampleProfileLoader::findIndirectCallFunctionSamples( auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { assert(L && R && "Expect non-null FunctionSamples"); - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); + if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate()) + return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate(); return FunctionSamples::getGUID(L->getName()) < FunctionSamples::getGUID(R->getName()); }; @@ -750,7 +703,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( // as that already includes both inlined callee and non-inlined ones.. Sum = 0; for (const auto *const FS : CalleeSamples) { - Sum += FS->getEntrySamples(); + Sum += FS->getHeadSamplesEstimate(); R.push_back(FS); } llvm::sort(R, FSCompare); @@ -771,7 +724,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( if (M->empty()) return R; for (const auto &NameFS : *M) { - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); R.push_back(&NameFS.second); } llvm::sort(R, FSCompare); @@ -1090,7 +1043,7 @@ void SampleProfileLoader::findExternalInlineCandidate( bool PreInline = UsePreInlinerDecision && CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); - if (!PreInline && CalleeSample->getEntrySamples() < Threshold) + if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; StringRef Name = CalleeSample->getFuncName(); @@ -1171,7 +1124,8 @@ bool SampleProfileLoader::inlineHotFunctions( assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) + if (FS->getHeadSamplesEstimate() > 0 || + FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1211,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1325,7 +1279,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, Factor = Probe->Factor; uint64_t CallsiteCount = - CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; + CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1481,7 +1435,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( continue; } uint64_t EntryCountDistributed = - FS->getEntrySamples() * Candidate.CallsiteDistribution; + FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1568,7 +1522,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) { continue; } @@ -1586,7 +1540,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( // Use entry samples as head samples during the merge, as inlinees // don't have head samples. const_cast<FunctionSamples *>(FS)->addHeadSamples( - FS->getEntrySamples()); + FS->getHeadSamplesEstimate()); // Note that we have to do the merge right after processing function. // This allows OutlineFS's profile to be used for annotation during @@ -1599,7 +1553,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); + pair.first->second.entryCount += FS->getHeadSamplesEstimate(); } } } @@ -1663,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); } } if (Sum) @@ -1825,17 +1779,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { return Changed; } -char SampleProfileLoaderLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) - std::unique_ptr<ProfiledCallGraph> SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG; @@ -2073,14 +2016,6 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } -ModulePass *llvm::createSampleProfileLoaderPass() { - return new SampleProfileLoaderLegacyPass(); -} - -ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { - return new SampleProfileLoaderLegacyPass(Name); -} - bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); @@ -2141,15 +2076,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, return retval; } -bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { - ACT = &getAnalysis<AssumptionCacheTracker>(); - TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); - TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); - ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); -} - bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n"); DILocation2SampleMap.clear(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 898a213d0849..ad00c116ce0a 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -747,78 +747,8 @@ struct DevirtIndex { void run(); }; - -struct WholeProgramDevirt : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary = nullptr; - const ModuleSummaryIndex *ImportSummary = nullptr; - - WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - WholeProgramDevirt(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // an optimization remark emitter on the fly, when we need it. - std::unique_ptr<OptimizationRemarkEmitter> ORE; - auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = std::make_unique<OptimizationRemarkEmitter>(F); - return *ORE; - }; - - auto LookupDomTree = [this](Function &F) -> DominatorTree & { - return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - - if (UseCommandLine) - return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter, - LookupDomTree); - - return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree, - ExportSummary, ImportSummary) - .run(); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - } -}; - } // end anonymous namespace -INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -char WholeProgramDevirt::ID = 0; - -ModulePass * -llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new WholeProgramDevirt(ExportSummary, ImportSummary); -} - PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 535a7736454c..4a459ec6c550 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1966,12 +1966,14 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } - // If there's no chance any bit will need to borrow from an adjacent bit: - // sub C, X --> xor X, C const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && - (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C)) - return BinaryOperator::CreateXor(Op1, Op0); + if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnes()) + return BinaryOperator::CreateXor(Op1, Op0); + } { Value *Y; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a8f2cd79830a..8253c575bc37 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2664,8 +2664,8 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // Inverted form (example): // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) bool TrueIfSignedL, TrueIfSignedR; - if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && - InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && + if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && + isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && (RHS->hasOneUse() || LHS->hasOneUse())) { Value *X, *Y; if (IsAnd) { @@ -3202,25 +3202,38 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, // TODO: This can be generalized to compares of non-signbits using // decomposeBitTestICmp(). It could be enhanced more by using (something like) // foldLogOpOfMaskedICmps(). - if ((LHS->hasOneUse() || RHS->hasOneUse()) && + const APInt *LC, *RC; + if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) && LHS0->getType() == RHS0->getType() && - LHS0->getType()->isIntOrIntVectorTy()) { + LHS0->getType()->isIntOrIntVectorTy() && + (LHS->hasOneUse() || RHS->hasOneUse())) { + // Convert xor of signbit tests to signbit test of xor'd values: // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) - return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0)); - // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) - return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0)); - + bool TrueIfSignedL, TrueIfSignedR; + if (isSignBitCheck(PredL, *LC, TrueIfSignedL) && + isSignBitCheck(PredR, *RC, TrueIfSignedR)) { + Value *XorLR = Builder.CreateXor(LHS0, RHS0); + return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) : + Builder.CreateIsNotNeg(XorLR); + } + + // (X > C) ^ (X < C + 2) --> X != C + 1 + // (X < C + 2) ^ (X > C) --> X != C + 1 + // Considering the correctness of this pattern, we should avoid that C is + // non-negative and C + 2 is negative, although it will be matched by other + // patterns. + const APInt *C1, *C2; + if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) && + PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) || + (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) && + PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1)))) + if (LHS0 == RHS0 && *C1 + 2 == *C2 && + (C1->isNegative() || C2->isNonNegative())) + return Builder.CreateICmpNE(LHS0, + ConstantInt::get(LHS0->getType(), *C1 + 1)); } // Instead of trying to imitate the folds for and/or, decompose this 'xor' diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index edfdf70c2b97..bc01d2ef7fe2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1140,8 +1140,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); - if (isFreeCall(&CI, &TLI)) - return visitFree(CI); + if (Value *FreedOp = getFreedOperand(&CI, &TLI)) + return visitFree(CI, FreedOp); // If the caller function (i.e. us, the function that contains this CallInst) // is nounwind, mark the call as nounwind, even if the callee isn't. @@ -1539,8 +1539,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Type *Ty = II->getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); Constant *ShAmtC; - if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && - !ShAmtC->containsConstantExpression()) { + if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) { // Canonicalize a shift amount constant operand to modulo the bit-width. Constant *WidthC = ConstantInt::get(Ty, BitWidth); Constant *ModuloC = @@ -2885,21 +2884,21 @@ bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, // of the respective allocator declaration with generic attributes. bool Changed = false; - if (isAllocationFn(&Call, TLI)) { - uint64_t Size; - ObjectSizeOpts Opts; - if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { - // TODO: We really should just emit deref_or_null here and then - // let the generic inference code combine that with nonnull. - if (Call.hasRetAttr(Attribute::NonNull)) { - Changed = !Call.hasRetAttr(Attribute::Dereferenceable); - Call.addRetAttr( - Attribute::getWithDereferenceableBytes(Call.getContext(), Size)); - } else { - Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); - Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( - Call.getContext(), Size)); - } + if (!Call.getType()->isPointerTy()) + return Changed; + + Optional<APInt> Size = getAllocSize(&Call, TLI); + if (Size && *Size != 0) { + // TODO: We really should just emit deref_or_null here and then + // let the generic inference code combine that with nonnull. + if (Call.hasRetAttr(Attribute::NonNull)) { + Changed = !Call.hasRetAttr(Attribute::Dereferenceable); + Call.addRetAttr(Attribute::getWithDereferenceableBytes( + Call.getContext(), Size->getLimitedValue())); + } else { + Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); + Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size->getLimitedValue())); } } @@ -3079,8 +3078,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy)); } - if (isAllocationFn(&Call, &TLI) && - isAllocRemovable(&cast<CallBase>(Call), &TLI)) + if (isRemovableAlloc(&Call, &TLI)) return visitAllocSite(Call); // Handle intrinsics which can be used in both call and invoke context. @@ -3242,15 +3240,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // the call because there is no place to put the cast instruction (without // breaking the critical edge). Bail out in this case. if (!Caller->use_empty()) { - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) - for (User *U : II->users()) + BasicBlock *PhisNotSupportedBlock = nullptr; + if (auto *II = dyn_cast<InvokeInst>(Caller)) + PhisNotSupportedBlock = II->getNormalDest(); + if (auto *CB = dyn_cast<CallBrInst>(Caller)) + PhisNotSupportedBlock = CB->getDefaultDest(); + if (PhisNotSupportedBlock) + for (User *U : Caller->users()) if (PHINode *PN = dyn_cast<PHINode>(U)) - if (PN->getParent() == II->getNormalDest() || - PN->getParent() == II->getUnwindDest()) + if (PN->getParent() == PhisNotSupportedBlock) return false; - // FIXME: Be conservative for callbr to avoid a quadratic search. - if (isa<CallBrInst>(Caller)) - return false; } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f6d36b85522..158d2e8289e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2002,9 +2002,12 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, Constant::getNullValue(Mul->getType())); } + if (MulC->isZero() || !(Mul->hasNoSignedWrap() || Mul->hasNoUnsignedWrap())) + return nullptr; + // If the multiply does not wrap, try to divide the compare constant by the // multiplication factor. - if (Cmp.isEquality() && !MulC->isZero()) { + if (Cmp.isEquality()) { // (mul nsw X, MulC) == C --> X == C /s MulC if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) { Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC)); @@ -2017,7 +2020,40 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, } } - return nullptr; + Constant *NewC = nullptr; + + // FIXME: Add assert that Pred is not equal to ICMP_SGE, ICMP_SLE, + // ICMP_UGE, ICMP_ULE. + + if (Mul->hasNoSignedWrap()) { + if (MulC->isNegative()) { + // MININT / -1 --> overflow. + if (C.isMinSignedValue() && MulC->isAllOnes()) + return nullptr; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + if (Mul->hasNoUnsignedWrap()) { + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + return NewC ? new ICmpInst(Pred, Mul->getOperand(0), NewC) : nullptr; } /// Fold icmp (shl 1, Y), C. @@ -2235,13 +2271,22 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, bool IsAShr = Shr->getOpcode() == Instruction::AShr; const APInt *ShiftValC; - if (match(Shr->getOperand(0), m_APInt(ShiftValC))) { + if (match(X, m_APInt(ShiftValC))) { if (Cmp.isEquality()) return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC); + // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0 + // (ShiftValC >> Y) <s 0 --> Y == 0 with ShiftValC < 0 + bool TrueIfSigned; + if (!IsAShr && ShiftValC->isNegative() && + isSignBitCheck(Pred, C, TrueIfSigned)) + return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE, + Shr->getOperand(1), + ConstantInt::getNullValue(X->getType())); + // If the shifted constant is a power-of-2, test the shift amount directly: - // (ShiftValC >> X) >u C --> X <u (LZ(C) - LZ(ShiftValC)) - // (ShiftValC >> X) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) + // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC)) + // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) if (!IsAShr && ShiftValC->isPowerOf2() && (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) { bool IsUGT = Pred == CmpInst::ICMP_UGT; @@ -2972,7 +3017,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { const APInt *C; bool TrueIfSigned; if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() && - InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) { + isSignBitCheck(Pred, *C, TrueIfSigned)) { if (match(BCSrcOp, m_FPExt(m_Value(X))) || match(BCSrcOp, m_FPTrunc(m_Value(X)))) { // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 271154bb3f5a..827b25533513 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -152,7 +152,7 @@ public: Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); Instruction *visitAllocSite(Instruction &FI); - Instruction *visitFree(CallInst &FI); + Instruction *visitFree(CallInst &FI, Value *FreedOp); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitAtomicRMWInst(AtomicRMWInst &SI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index f4e2d1239f0f..13c98b935adf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -566,6 +566,13 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, return false; return true; } + case Instruction::Mul: { + const APInt *MulConst; + // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`) + return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) && + MulConst->isNegatedPowerOf2() && + MulConst->countTrailingZeros() == NumBits; + } } } @@ -680,6 +687,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, isLeftShift, IC, DL)); return PN; } + case Instruction::Mul: { + assert(!isLeftShift && "Unexpected shift direction!"); + auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0)); + IC.InsertNewInstWith(Neg, *I); + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits); + auto *And = BinaryOperator::CreateAnd(Neg, + ConstantInt::get(I->getType(), Mask)); + And->takeName(I); + return IC.InsertNewInstWith(And, *I); + } } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 75520a0c8d5f..71c763de43b4 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -994,6 +994,24 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { return SelectInst::Create(X, TVal, FVal); } +static Constant *constantFoldOperationIntoSelectOperand( + Instruction &I, SelectInst *SI, Value *SO) { + auto *ConstSO = dyn_cast<Constant>(SO); + if (!ConstSO) + return nullptr; + + SmallVector<Constant *> ConstOps; + for (Value *Op : I.operands()) { + if (Op == SI) + ConstOps.push_back(ConstSO); + else if (auto *C = dyn_cast<Constant>(Op)) + ConstOps.push_back(C); + else + llvm_unreachable("Operands should be select or constant"); + } + return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout()); +} + static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) @@ -1101,8 +1119,17 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); + // Make sure that one of the select arms constant folds successfully. + Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV); + Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV); + if (!NewTV && !NewFV) + return nullptr; + + // Create an instruction for the arm that did not fold. + if (!NewTV) + NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + if (!NewFV) + NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } @@ -2774,13 +2801,14 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) { + if (getFreedOperand(cast<CallBase>(I), &TLI) == PI && + getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); continue; } - if (isReallocLikeFn(I, &TLI) && + if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); @@ -2805,7 +2833,7 @@ static bool isAllocSiteRemovable(Instruction *AI, } Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { - assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI)); + assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true @@ -3007,9 +3035,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, return &FI; } -Instruction *InstCombinerImpl::visitFree(CallInst &FI) { - Value *Op = FI.getArgOperand(0); - +Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Leave a marker since we can't modify the CFG here. @@ -3024,12 +3050,10 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) { // If we had free(realloc(...)) with no intervening uses, then eliminate the // realloc() entirely. - if (CallInst *CI = dyn_cast<CallInst>(Op)) { - if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) { - return eraseInstFromFunction( - *replaceInstUsesWith(*CI, CI->getOperand(0))); - } - } + CallInst *CI = dyn_cast<CallInst>(Op); + if (CI && CI->hasOneUse()) + if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI)) + return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp)); // If we optimize for code size, try to move the call to free before the null // test so that simplify cfg can remove the empty block and dead code diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4fed4bd18fb1..cf2754b1dd60 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -108,6 +108,7 @@ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; +static const uint64_t kFreeBSDAArch64_ShadowOffset64 = 1ULL << 47; static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000; static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; @@ -523,6 +524,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kPPC64_ShadowOffset64; else if (IsSystemZ) Mapping.Offset = kSystemZ_ShadowOffset64; + else if (IsFreeBSD && IsAArch64) + Mapping.Offset = kFreeBSDAArch64_ShadowOffset64; else if (IsFreeBSD && !IsMIPS64) { if (IsKasan) Mapping.Offset = kFreeBSDKasan_ShadowOffset64; diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index 57c491436b93..27107f46ed92 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -101,42 +101,6 @@ static bool runCGProfilePass( return addModuleFlags(M, Counts); } -namespace { -struct CGProfileLegacyPass final : public ModulePass { - static char ID; - CGProfileLegacyPass() : ModulePass(ID) { - initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<LazyBlockFrequencyInfoPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - bool runOnModule(Module &M) override { - auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { - return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI(); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - }; - - return runCGProfilePass(M, GetBFI, GetTTI, true); - } -}; - -} // namespace - -char CGProfileLegacyPass::ID = 0; - -INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, - false) - -ModulePass *llvm::createCGProfileLegacyPass() { - return new CGProfileLegacyPass(); -} - PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { FunctionAnalysisManager &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index e5c0705b916e..adc007dacae4 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -103,47 +103,6 @@ static void parseCHRFilterFiles() { } namespace { -class ControlHeightReductionLegacyPass : public FunctionPass { -public: - static char ID; - - ControlHeightReductionLegacyPass() : FunctionPass(ID) { - initializeControlHeightReductionLegacyPassPass( - *PassRegistry::getPassRegistry()); - parseCHRFilterFiles(); - } - - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<BlockFrequencyInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<RegionInfoPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; -} // end anonymous namespace - -char ControlHeightReductionLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) -INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) - -FunctionPass *llvm::createControlHeightReductionLegacyPass() { - return new ControlHeightReductionLegacyPass(); -} - -namespace { struct CHRStats { CHRStats() = default; @@ -2083,18 +2042,6 @@ bool CHR::run() { return Changed; } -bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - ProfileSummaryInfo &PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = - std::make_unique<OptimizationRemarkEmitter>(&F); - return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); -} - namespace llvm { ControlHeightReductionPass::ControlHeightReductionPass() { diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp index 2091881c29fe..d7561c193aa3 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -163,42 +163,11 @@ public: } }; // End of InstrOrderFile struct - -class InstrOrderFileLegacyPass : public ModulePass { -public: - static char ID; - - InstrOrderFileLegacyPass() : ModulePass(ID) { - initializeInstrOrderFileLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; -}; - } // End anonymous namespace -bool InstrOrderFileLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - return InstrOrderFile().run(M); -} - PreservedAnalyses InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) { if (InstrOrderFile().run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - -INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) -INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) - -char InstrOrderFileLegacyPass::ID = 0; - -ModulePass *llvm::createInstrOrderFilePass() { - return new InstrOrderFileLegacyPass(); -} diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 3572cb3b50e2..5b7aa304b987 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -147,35 +147,6 @@ cl::opt<bool> SkipRetExitBlock( "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); -class InstrProfilingLegacyPass : public ModulePass { - InstrProfiling InstrProf; - -public: - static char ID; - - InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false) - : ModulePass(ID), InstrProf(Options, IsCS) { - initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Frontend instrumentation-based coverage lowering"; - } - - bool runOnModule(Module &M) override { - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - }; - return InstrProf.run(M, GetTLI); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - } -}; - /// /// A helper class to promote one counter RMW operation in the loop /// into register update. @@ -439,21 +410,6 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -char InstrProfilingLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", - false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, - false) - -ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, - bool IsCS) { - return new InstrProfilingLegacyPass(Options, IsCS); -} - bool InstrProfiling::lowerIntrinsics(Function *F) { bool MadeChange = false; PromotionCandidates.clear(); diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 9ff0e632bd7f..bd575b6cf3b0 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -94,11 +94,6 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemProfilerLegacyPassPass(Registry); initializeModuleMemProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); - initializeControlHeightReductionLegacyPassPass(Registry); - initializeCGProfileLegacyPassPass(Registry); - initializeInstrOrderFileLegacyPassPass(Registry); - initializeInstrProfilingLegacyPassPass(Registry); - initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerLegacyPassPass(Registry); } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4d72f6c3d1a9..4606bd5de6c3 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -417,6 +417,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = { 0x01000000000, // OriginBase }; +// aarch64 FreeBSD +static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = { + 0x1800000000000, // AndMask + 0x0400000000000, // XorMask + 0x0200000000000, // ShadowBase + 0x0700000000000, // OriginBase +}; + // i386 FreeBSD static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 0x000180000000, // AndMask @@ -466,6 +474,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { &Linux_AArch64_MemoryMapParams, }; +static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = { + nullptr, + &FreeBSD_AArch64_MemoryMapParams, +}; + static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_I386_MemoryMapParams, &FreeBSD_X86_64_MemoryMapParams, @@ -894,6 +907,9 @@ void MemorySanitizer::initializeModule(Module &M) { switch (TargetTriple.getOS()) { case Triple::FreeBSD: switch (TargetTriple.getArch()) { + case Triple::aarch64: + MapParams = FreeBSD_ARM_MemoryMapParams.bits64; + break; case Triple::x86_64: MapParams = FreeBSD_X86_MemoryMapParams.bits64; break; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3a29cd70e42e..c4512d0222cd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -210,12 +210,11 @@ cl::opt<bool> // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt<bool> - NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), - cl::Hidden, - cl::desc("The option is used to turn on/off " - "warnings about hash mismatch for comdat " - "functions.")); +static cl::opt<bool> NoPGOWarnMismatchComdatWeak( + "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, + cl::desc("The option is used to turn on/off " + "warnings about hash mismatch for comdat " + "or weak functions.")); // Command line option to enable/disable select instruction instrumentation. static cl::opt<bool> @@ -287,6 +286,11 @@ static cl::opt<unsigned> PGOVerifyBFICutoff( cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below.")); +static cl::opt<std::string> PGOTraceFuncHash( + "pgo-trace-func-hash", cl::init("-"), cl::Hidden, + cl::value_desc("function name"), + cl::desc("Trace the hash of the function with this name.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -630,6 +634,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << ", High32 CRC = " << JCH.getCRC()); } LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); + + if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) + dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash + << " in building " << F.getParent()->getSourceFileName() << "\n"; } // Check if we can safely rename this Comdat function. @@ -832,8 +840,6 @@ static void instrumentOneFunc( auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), FuncInfo.FunctionHash); if (PGOFunctionEntryCoverage) { - assert(!IsCS && - "entry coverge does not support context-sensitive instrumentation"); auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, @@ -1216,8 +1222,9 @@ static void annotateFunctionWithHashMismatch(Function &F, bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, bool &AllMinusOnes) { auto &Ctx = M->getContext(); - Expected<InstrProfRecord> Result = - PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + uint64_t MismatchedFuncSum = 0; + Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( + FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); if (Error E = Result.takeError()) { handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); @@ -1233,10 +1240,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || - (NoPGOWarnMismatchComdat && - (F.hasComdat() || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash + << " skip=" << SkipWarning << ")"); // Emit function metadata indicating PGO profile mismatch. annotateFunctionWithHashMismatch(F, M->getContext()); } @@ -1245,9 +1253,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + - std::string(" Hash = ") + - std::to_string(FuncInfo.FunctionHash); + std::string Msg = + IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + + std::string(" up to ") + std::to_string(MismatchedFuncSum) + + std::string(" count discarded"); Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index d9d11cc90d3d..3ca476e74953 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -278,53 +278,6 @@ private: const SpecialCaseList *Allowlist; const SpecialCaseList *Blocklist; }; - -class ModuleSanitizerCoverageLegacyPass : public ModulePass { -public: - ModuleSanitizerCoverageLegacyPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = - std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()) - : ModulePass(ID), Options(Options) { - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, - *vfs::getRealFileSystem()); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, - *vfs::getRealFileSystem()); - initializeModuleSanitizerCoverageLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), - Blocklist.get()); - auto DTCallback = [this](Function &F) -> const DominatorTree * { - return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { - return &this->getAnalysis<PostDominatorTreeWrapperPass>(F) - .getPostDomTree(); - }; - return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); - } - - static char ID; // Pass identification, replacement for typeid - StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTreeWrapperPass>(); - } - -private: - SanitizerCoverageOptions Options; - - std::unique_ptr<SpecialCaseList> Allowlist; - std::unique_ptr<SpecialCaseList> Blocklist; -}; - } // namespace PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, @@ -1075,20 +1028,3 @@ ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { return "\1section$end$__DATA$__" + Section; return "__stop___" + Section; } - -char ModuleSanitizerCoverageLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options, - const std::vector<std::string> &AllowlistFiles, - const std::vector<std::string> &BlocklistFiles) { - return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, - BlocklistFiles); -} diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4c42869dbd58..3f0dad7ee769 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -776,6 +776,11 @@ struct DSEState { // fall back to CFG scan starting from all non-unreachable roots. bool AnyUnreachableExit; + // Whether or not we should iterate on removing dead stores at the end of the + // function due to removing a store causing a previously captured pointer to + // no longer be captured. + bool ShouldIterateEndOfFunctionDSE; + // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; @@ -1103,9 +1108,8 @@ struct DSEState { return {std::make_pair(MemoryLocation(Ptr, Len), false)}; if (auto *CB = dyn_cast<CallBase>(I)) { - if (isFreeCall(I, &TLI)) - return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)), - true)}; + if (Value *FreedOp = getFreedOperand(CB, &TLI)) + return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)}; } return None; @@ -1114,9 +1118,9 @@ struct DSEState { /// Returns true if \p I is a memory terminator instruction like /// llvm.lifetime.end or free. bool isMemTerminatorInst(Instruction *I) const { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); - return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || - isFreeCall(I, &TLI); + auto *CB = dyn_cast<CallBase>(I); + return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end || + getFreedOperand(CB, &TLI) != nullptr); } /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from @@ -1598,6 +1602,14 @@ struct DSEState { if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { SkipStores.insert(MD); + if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) { + if (SI->getValueOperand()->getType()->isPointerTy()) { + const Value *UO = getUnderlyingObject(SI->getValueOperand()); + if (CapturedBeforeReturn.erase(UO)) + ShouldIterateEndOfFunctionDSE = true; + InvisibleToCallerAfterRet.erase(UO); + } + } } Updater.removeMemoryAccess(MA); @@ -1671,33 +1683,36 @@ struct DSEState { LLVM_DEBUG( dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n"); - for (MemoryDef *Def : llvm::reverse(MemDefs)) { - if (SkipStores.contains(Def)) - continue; + do { + ShouldIterateEndOfFunctionDSE = false; + for (MemoryDef *Def : llvm::reverse(MemDefs)) { + if (SkipStores.contains(Def)) + continue; - Instruction *DefI = Def->getMemoryInst(); - auto DefLoc = getLocForWrite(DefI); - if (!DefLoc || !isRemovable(DefI)) - continue; + Instruction *DefI = Def->getMemoryInst(); + auto DefLoc = getLocForWrite(DefI); + if (!DefLoc || !isRemovable(DefI)) + continue; - // NOTE: Currently eliminating writes at the end of a function is limited - // to MemoryDefs with a single underlying object, to save compile-time. In - // practice it appears the case with multiple underlying objects is very - // uncommon. If it turns out to be important, we can use - // getUnderlyingObjects here instead. - const Value *UO = getUnderlyingObject(DefLoc->Ptr); - if (!isInvisibleToCallerAfterRet(UO)) - continue; + // NOTE: Currently eliminating writes at the end of a function is + // limited to MemoryDefs with a single underlying object, to save + // compile-time. In practice it appears the case with multiple + // underlying objects is very uncommon. If it turns out to be important, + // we can use getUnderlyingObjects here instead. + const Value *UO = getUnderlyingObject(DefLoc->Ptr); + if (!isInvisibleToCallerAfterRet(UO)) + continue; - if (isWriteAtEndOfFunction(Def)) { - // See through pointer-to-pointer bitcasts - LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " - "of the function\n"); - deleteDeadInstruction(DefI); - ++NumFastStores; - MadeChange = true; + if (isWriteAtEndOfFunction(Def)) { + // See through pointer-to-pointer bitcasts + LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " + "of the function\n"); + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; + } } - } + } while (ShouldIterateEndOfFunctionDSE); return MadeChange; } diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ca0bdc8f7b..9698ed97379e 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1738,7 +1738,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // through *explicit* control flow. We have to eliminate the possibility of // implicit exits (see below) before we know it's truly exact. const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) + if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) return false; assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); @@ -1769,7 +1769,8 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) + if (isa<SCEVCouldNotCompute>(ExitCount) || + !Rewriter.isSafeToExpand(ExitCount)) return true; assert(SE->isLoopInvariant(ExitCount, L) && diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index b54cf5e7cb20..328615011ceb 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1451,7 +1451,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " preloop exit limit " << *ExitPreLoopAtSCEV << " at block " << InsertPt->getParent()->getName() @@ -1478,7 +1478,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " main loop exit limit " << *ExitMainLoopAtSCEV << " at block " << InsertPt->getParent()->getName() diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 9590fbbb1994..fd2eaee8b47d 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -388,15 +388,15 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride)) continue; + BasicBlock *BB = P.InsertPt->getParent(); + SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr( SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead), P.LSCEVAddRec->getStepRecurrence(*SE))); - if (!isSafeToExpand(NextLSCEV, *SE)) + if (!SCEVE.isSafeToExpand(NextLSCEV)) continue; - BasicBlock *BB = P.InsertPt->getParent(); Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/); - SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt); IRBuilder<> Builder(P.InsertPt); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d908c151d9f2..3ed022f65d9a 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1129,7 +1129,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(Start, *SE)) + if (!Expander.isSafeToExpand(Start)) return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn @@ -1163,7 +1163,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(NumBytesS, *SE)) + if (!Expander.isSafeToExpand(NumBytesS)) return Changed; Value *NumBytes = diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1d3023d04463..18daa4295224 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -288,7 +288,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) { Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - return; } namespace { diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index d0ee5b47a8ca..b327d38d2a84 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -275,7 +275,8 @@ class LoopPredication { /// which is that an expression *can be made* invariant via SCEVExpander. /// Thus, this version is only suitable for finding an insert point to be be /// passed to SCEVExpander! - Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops); + Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User, + ArrayRef<const SCEV *> Ops); /// Return true if the value is known to produce a single fixed value across /// all iterations on which it executes. Note that this does not imply @@ -418,13 +419,14 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, return Builder.getFalse(); } - Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS})); - Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS})); + Value *LHSV = + Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS})); + Value *RHSV = + Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS})); IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV})); return Builder.CreateICmp(Pred, LHSV, RHSV); } - // Returns true if its safe to truncate the IV to RangeCheckType. // When the IV type is wider than the range operand type, we can still do loop // predication, by generating SCEVs for the range and latch that are of the @@ -516,14 +518,15 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use, return Preheader->getTerminator(); } -Instruction *LoopPredication::findInsertPt(Instruction *Use, - ArrayRef<const SCEV*> Ops) { +Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander, + Instruction *Use, + ArrayRef<const SCEV *> Ops) { // Subtlety: SCEV considers things to be invariant if the value produced is // the same across iterations. This is not the same as being able to // evaluate outside the loop, which is what we actually need here. for (const SCEV *Op : Ops) if (!SE->isLoopInvariant(Op, L) || - !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE)) + !Expander.isSafeToExpandAt(Op, Preheader->getTerminator())) return Use; return Preheader->getTerminator(); } @@ -589,8 +592,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -632,8 +635,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -1159,7 +1162,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || !SE->isLoopInvariant(MinEC, L) || - !isSafeToExpandAt(MinEC, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(MinEC, WidenableBR)) return ChangedLoop; // Subtlety: We need to avoid inserting additional uses of the WC. We know @@ -1198,7 +1201,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa<SCEVCouldNotCompute>(ExitCount) || ExitCount->getType()->isPointerTy() || - !isSafeToExpandAt(ExitCount, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR)) continue; const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index b7e0e32780b4..083f87436acd 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -576,6 +576,18 @@ public: return false; } + // TODO: Tokens may breach LCSSA form by default. However, the transform for + // dead exit blocks requires LCSSA form to be maintained for all values, + // tokens included, otherwise it may break use-def dominance (see PR56243). + if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) { + assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) && + "LCSSA broken not by tokens?"); + LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop " + << Header->getName() + << ": tokens uses potentially break LCSSA form.\n"); + return false; + } + SE.forgetTopmostLoop(&L); // Dump analysis results. LLVM_DEBUG(dump()); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ef7809c6681..a3434f8bc46d 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1950,6 +1950,7 @@ class LSRInstance { Loop *const L; MemorySSAUpdater *MSSAU; TTI::AddressingModeKind AMK; + mutable SCEVExpander Rewriter; bool Changed = false; /// This is the insert position that the current loop's induction variable @@ -1998,7 +1999,7 @@ class LSRInstance { SmallVectorImpl<ChainUsers> &ChainUsersVec); void FinalizeChain(IVChain &Chain); void CollectChains(); - void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, + void GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); @@ -2068,22 +2069,19 @@ class LSRInstance { void Solve(SmallVectorImpl<const Formula *> &Solution) const; BasicBlock::iterator - HoistInsertPosition(BasicBlock::iterator IP, - const SmallVectorImpl<Instruction *> &Inputs) const; - BasicBlock::iterator - AdjustInsertPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const; + HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl<Instruction *> &Inputs) const; + BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const; Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - BasicBlock::iterator IP, SCEVExpander &Rewriter, + BasicBlock::iterator IP, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); @@ -3183,7 +3181,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. -void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, +void LSRInstance::GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. @@ -3335,7 +3333,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE) && + if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && (!NV->getType()->isPointerTy() || SE.getPointerBase(N) == SE.getPointerBase(S))) { // S is normalized, so normalize N before folding it into S @@ -3343,6 +3341,21 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); + } else if (L->isLoopInvariant(NV) && + (!isa<Instruction>(NV) || + DT.dominates(cast<Instruction>(NV), L->getHeader())) && + !NV->getType()->isPointerTy()) { + // If we can't generally expand the expression (e.g. it contains + // a divide), but it is already at a loop invariant point before the + // loop, wrap it in an unknown (to prevent the expander from trying + // to re-expand in a potentially unsafe way.) The restriction to + // integer types is required because the unknown hides the base, and + // SCEV can't compute the difference of two unknown pointers. + N = SE.getUnknown(NV); + N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); + Kind = LSRUse::ICmpZero; + S = SE.getMinusSCEV(N, S); + assert(!isa<SCEVCouldNotCompute>(S)); } // -1 and the negations of all interesting strides (except the negation @@ -3385,10 +3398,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { /// Insert a formula for the given expression into the given use, separating out /// loop-variant portions from loop-invariant and loop-computable portions. -void -LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { +void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, + size_t LUIdx) { // Mark uses whose expressions cannot be expanded. - if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false)) + if (!Rewriter.isSafeToExpand(S)) LU.RigidFormula = true; Formula F; @@ -5206,11 +5219,8 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, /// Determine an input position which will be dominated by the operands and /// which will dominate the result. -BasicBlock::iterator -LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const { +BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( + BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -5273,14 +5283,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// is called "expanding"). Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + IP = AdjustInsertPositionForExpand(IP, LF, LU); Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can @@ -5452,7 +5461,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, /// to be expanded in multiple places. void LSRInstance::RewriteForPHI( PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5507,8 +5516,8 @@ void LSRInstance::RewriteForPHI( if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { - Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(), - Rewriter, DeadInsts); + Value *FullV = + Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5567,15 +5576,14 @@ void LSRInstance::RewriteForPHI( /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { - RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts); + RewriteForPHI(PN, LU, LF, F, DeadInsts); } else { - Value *FullV = - Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); + Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5609,13 +5617,6 @@ void LSRInstance::ImplementSolution( // we can remove them after we are done working. SmallVector<WeakTrackingVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", - false); -#ifndef NDEBUG - Rewriter.setDebugType(DEBUG_TYPE); -#endif - Rewriter.disableCanonicalMode(); - Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Mark phi nodes that terminate chains so the expander tries to reuse them. @@ -5627,12 +5628,12 @@ void LSRInstance::ImplementSolution( // Expand the new value definitions and update the users. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { - Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts); + Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } for (const IVChain &Chain : IVChainVec) { - GenerateIVChain(Chain, Rewriter, DeadInsts); + GenerateIVChain(Chain, DeadInsts); Changed = true; } @@ -5697,8 +5698,10 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), - MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ? - PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)) { + MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 + ? PreferredAddresingMode + : TTI.getPreferredAddressingMode(L, &SE)), + Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -5733,6 +5736,14 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); dbgs() << ":\n"); + // Configure SCEVExpander already now, so the correct mode is used for + // isSafeToExpand() checks. +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif + Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); + // First, perform some low-level loop optimizations. OptimizeShadowIV(); OptimizeLoopTermCond(); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 75f0896d4845..240fb5e60687 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) { isOr = true; } +/// Return true if I is an instruction with the FastMathFlags that are needed +/// for general reassociation set. This is not the same as testing +/// Instruction::isAssociative() because it includes operations like fsub. +/// (This routine is only intended to be called for floating-point operations.) +static bool hasFPAssociativeFlags(Instruction *I) { + assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops"); + return I->hasAllowReassoc() && I->hasNoSignedZeros(); +} + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && I->getOpcode() == Opcode) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -449,7 +458,8 @@ using RepeatedValue = std::pair<Value*, APInt>; /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. static bool LinearizeExprTree(Instruction *I, - SmallVectorImpl<RepeatedValue> &Ops) { + SmallVectorImpl<RepeatedValue> &Ops, + ReassociatePass::OrderedSet &ToRedo) { assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -572,23 +582,32 @@ static bool LinearizeExprTree(Instruction *I, assert((!isa<Instruction>(Op) || cast<Instruction>(Op)->getOpcode() != Opcode || (isa<FPMathOperator>(Op) && - !cast<Instruction>(Op)->isFast())) && + !hasFPAssociativeFlags(cast<Instruction>(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); // If this is a multiply expression, turn any internal negations into - // multiplies by -1 so they can be reassociated. - if (Instruction *Tmp = dyn_cast<Instruction>(Op)) - if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) || - (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) { - LLVM_DEBUG(dbgs() - << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - Tmp = LowerNegateToMultiply(Tmp); - LLVM_DEBUG(dbgs() << *Tmp << '\n'); - Worklist.push_back(std::make_pair(Tmp, Weight)); - Changed = true; - continue; + // multiplies by -1 so they can be reassociated. Add any users of the + // newly created multiplication by -1 to the redo list, so any + // reassociation opportunities that are exposed will be reassociated + // further. + Instruction *Neg; + if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) || + (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) && + match(Op, m_Instruction(Neg))) { + LLVM_DEBUG(dbgs() + << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + Instruction *Mul = LowerNegateToMultiply(Neg); + LLVM_DEBUG(dbgs() << *Mul << '\n'); + Worklist.push_back(std::make_pair(Mul, Weight)); + for (User *U : Mul->users()) { + if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U)) + ToRedo.insert(UserBO); } + ToRedo.insert(Neg); + Changed = true; + continue; + } // Failed to morph into an expression of the right type. This really is // a leaf. @@ -1141,7 +1160,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); SmallVector<ValueEntry, 8> Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2206,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (Instruction *Res = canonicalizeNegFPConstants(I)) I = Res; - // Don't optimize floating-point instructions unless they are 'fast'. - if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + // Don't optimize floating-point instructions unless they have the + // appropriate FastMathFlags for reassociation enabled. + if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I)) return; // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -2320,7 +2340,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(I, Tree); + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); SmallVector<ValueEntry, 8> Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 51e4a5773f3e..baf407c5037b 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1702,10 +1702,20 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ auto &Context = Call->getContext(); auto &DL = Call->getModule()->getDataLayout(); auto GetBaseAndOffset = [&](Value *Derived) { - assert(PointerToBase.count(Derived)); + Value *Base = nullptr; + // Optimizations in unreachable code might substitute the real pointer + // with undef, poison or null-derived constant. Return null base for + // them to be consistent with the handling in the main algorithm in + // findBaseDefiningValue. + if (isa<Constant>(Derived)) + Base = + ConstantPointerNull::get(cast<PointerType>(Derived->getType())); + else { + assert(PointerToBase.count(Derived)); + Base = PointerToBase.find(Derived)->second; + } unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = PointerToBase.find(Derived)->second; Value *Base_int = Builder.CreatePtrToInt( Base, Type::getIntNTy(Context, IntPtrSize)); Value *Derived_int = Builder.CreatePtrToInt( diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 008ddfc72740..5ab9e25577d8 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -111,8 +111,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningLegacyPassPass(Registry); - initializeEntryExitInstrumenterPass(Registry); - initializePostInlineEntryExitInstrumenterPass(Registry); } void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) { diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index f6525ad7de9b..0b797abefe20 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -68,11 +68,6 @@ static cl::opt<bool> cl::desc("Allow relaxed uniform region checks"), cl::init(true)); -static cl::opt<unsigned> - ReorderNodeSize("structurizecfg-node-reorder-size", - cl::desc("Limit region size for reordering nodes"), - cl::init(100), cl::Hidden); - // Definition of the complex types used in this pass. using BBValuePair = std::pair<BasicBlock *, Value *>; @@ -267,8 +262,6 @@ class StructurizeCFG { void orderNodes(); - void reorderNodes(); - void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -427,57 +420,6 @@ void StructurizeCFG::orderNodes() { } } -/// Change the node ordering to decrease the range of live values, especially -/// the values that capture the control flow path for branches. We do this -/// by moving blocks with a single predecessor and successor to appear after -/// predecessor. The motivation is to move some loop exit blocks into a loop. -/// In cases where a loop has a large number of exit blocks, this reduces the -/// amount of values needed across the loop boundary. -void StructurizeCFG::reorderNodes() { - SmallVector<RegionNode *, 8> NewOrder; - DenseMap<BasicBlock *, unsigned> MoveTo; - BitVector Moved(Order.size()); - - // The benefits of reordering nodes occurs for large regions. - if (Order.size() <= ReorderNodeSize) - return; - - // The algorithm works with two passes over Order. The first pass identifies - // the blocks to move and the position to move them to. The second pass - // creates the new order based upon this information. We move blocks with - // a single predecessor and successor. If there are multiple candidates then - // maintain the original order. - BBSet Seen; - for (int I = Order.size() - 1; I >= 0; --I) { - auto *BB = Order[I]->getEntry(); - Seen.insert(BB); - auto *Pred = BB->getSinglePredecessor(); - auto *Succ = BB->getSingleSuccessor(); - // Consider only those basic blocks that have a predecessor in Order and a - // successor that exits the region. The region may contain subregions that - // have been structurized and are not included in Order. - if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && - !MoveTo.count(Pred)) { - MoveTo[Pred] = I; - Moved.set(I); - } - } - - // If no blocks have been moved then the original order is good. - if (!Moved.count()) - return; - - for (size_t I = 0, E = Order.size(); I < E; ++I) { - auto *BB = Order[I]->getEntry(); - if (MoveTo.count(BB)) - NewOrder.push_back(Order[MoveTo[BB]]); - if (!Moved[I]) - NewOrder.push_back(Order[I]); - } - - Order.assign(NewOrder); -} - /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -1139,7 +1081,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { ParentRegion = R; orderNodes(); - reorderNodes(); collectInfos(); createFlow(); insertConditions(false); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 079b2fc973b9..e3cb5f359e34 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -80,7 +80,7 @@ void llvm::detachDeadBlocks( // contained within it must dominate their uses, that all uses will // eventually be removed (they are themselves dead). if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.replaceAllUsesWith(PoisonValue::get(I.getType())); BB->getInstList().pop_back(); } new UnreachableInst(BB->getContext(), BB); @@ -188,8 +188,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Don't break self-loops. if (PredBB == BB) return false; - // Don't break unwinding instructions. - if (PredBB->getTerminator()->isExceptionalTerminator()) + + // Don't break unwinding instructions or terminators with other side-effects. + Instruction *PTI = PredBB->getTerminator(); + if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) return false; // Can't merge if there are multiple distinct successors. @@ -202,7 +204,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, BasicBlock *NewSucc = nullptr; unsigned FallThruPath; if (PredecessorWithTwoSuccessors) { - if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator()))) + if (!(PredBB_BI = dyn_cast<BranchInst>(PTI))) return false; BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BB_JmpI || !BB_JmpI->isUnconditional()) @@ -256,7 +258,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - Instruction *PTI = PredBB->getTerminator(); Instruction *STI = BB->getTerminator(); Instruction *Start = &*BB->begin(); // If there's nothing to move, mark the starting instruction as the last @@ -1141,7 +1142,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, if (Preds.empty()) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB); } // Update DominatorTree, LoopInfo, and LCCSA analysis information. diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index c4a58f36c171..e25ec74a0572 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -270,9 +270,6 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, bool Changed = false; - if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) - Changed |= setDoesNotFreeMemory(F); - if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) Changed |= setNonLazyBind(F); @@ -285,14 +282,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strchr: case LibFunc_strrchr: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -304,7 +301,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_strcat: case LibFunc_strncat: Changed |= setOnlyAccessesArgMemory(F); @@ -315,7 +312,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strcpy: case LibFunc_strncpy: Changed |= setReturnedArg(F, 0); @@ -330,14 +327,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 case LibFunc_strncmp: // 0,1 @@ -348,7 +345,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strcoll: case LibFunc_strcasecmp: // 0,1 case LibFunc_strncasecmp: // @@ -359,7 +356,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyAccessesArgMemory(F); @@ -367,26 +364,26 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -398,7 +395,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat: case LibFunc_statvfs: Changed |= setRetAndArgsNoUndef(F); @@ -406,7 +403,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -414,7 +411,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_sprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -423,7 +420,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_snprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -432,7 +429,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_setitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -440,13 +437,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_aligned_alloc: Changed |= setAlignedAllocParam(F, 0); Changed |= setAllocSize(F, 1, None); @@ -464,7 +461,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_memcmp: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); @@ -472,21 +469,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memchr: case LibFunc_memrchr: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_modf: case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memcpy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -497,7 +494,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memmove: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -506,7 +503,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_mempcpy: case LibFunc_memccpy: Changed |= setWillReturn(F); @@ -519,7 +516,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memalign: Changed |= setAllocFamily(F, "malloc"); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | @@ -531,19 +528,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_mkdir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_mktime: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_realloc: case LibFunc_reallocf: case LibFunc_vec_realloc: @@ -559,17 +556,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setArgNoUndef(F, 1); - return Changed; + break; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_rewind: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: @@ -577,7 +574,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_rename: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -585,20 +582,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_readlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -607,7 +604,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyWritesMemory(F, 1); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -615,14 +612,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bzero: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyWritesMemory(F, 0); - return Changed; + break; case LibFunc_calloc: case LibFunc_vec_calloc: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" @@ -634,21 +631,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_chmod: case LibFunc_chown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_atoi: case LibFunc_atol: case LibFunc_atof: @@ -657,13 +654,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_access: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -672,19 +669,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fdopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_feof: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_free: case LibFunc_vec_free: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" @@ -696,7 +693,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: @@ -713,45 +710,45 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_ferror: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); - return Changed; + break; case LibFunc_fputc: case LibFunc_fputc_unlocked: case LibFunc_fstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_frexp: case LibFunc_frexpf: case LibFunc_frexpl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fstatvfs: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fgets: case LibFunc_fgets_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); - return Changed; + break; case LibFunc_fread: case LibFunc_fread_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_fwrite: case LibFunc_fwrite_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -759,7 +756,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? - return Changed; + break; case LibFunc_fputs: case LibFunc_fputs_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -767,7 +764,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setRetAndArgsNoUndef(F); @@ -775,73 +772,73 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fgetpos: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getlogin_r: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_gets: case LibFunc_getchar: case LibFunc_getchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_getitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getpwnam: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ungetc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_uname: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_unlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_unsetenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_utime: case LibFunc_utimes: Changed |= setRetAndArgsNoUndef(F); @@ -850,13 +847,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putc: case LibFunc_putc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: @@ -864,23 +861,23 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putchar: case LibFunc_putchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_popen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -889,18 +886,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_pclose: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_vscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vsscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -908,20 +905,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vfscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setRetAndArgsNoUndef(F); @@ -929,63 +926,63 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vsnprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_opendir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_tmpfile: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_times: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_htonl: case LibFunc_htons: case LibFunc_ntohl: case LibFunc_ntohs: Changed |= setDoesNotThrow(F); Changed |= setDoesNotAccessMemory(F); - return Changed; + break; case LibFunc_lstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_lchown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_qsort: // May throw; places call through function pointer. // Cannot give undef pointer/size Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_dunder_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -995,28 +992,28 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_under_IO_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_under_IO_putc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_dunder_isoc99_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: @@ -1025,7 +1022,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_isoc99_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1033,7 +1030,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fopen64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1042,30 +1039,30 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_tmpfile64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's @@ -1074,7 +1071,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memset_pattern4: case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: @@ -1089,18 +1086,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotThrow(F); - return Changed; + break; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_abs: case LibFunc_acos: case LibFunc_acosf: @@ -1227,12 +1224,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotFreeMemory(F); Changed |= setOnlyWritesMemory(F); Changed |= setWillReturn(F); - return Changed; + break; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. - return false; + break; } + // We have to do this step after AllocKind has been inferred on functions so + // we can reliably identify free-like and realloc-like functions. + if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) + Changed |= setDoesNotFreeMemory(F); + return Changed; } static void setArgExtAttr(Function &F, unsigned ArgNo, diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index f229d4bf14e9..9101a1e41f7b 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -65,23 +65,6 @@ static bool canonicalizeAliases(Module &M) { canonicalizeAlias(&GA, Changed); return Changed; } - -// Legacy pass that canonicalizes aliases. -class CanonicalizeAliasesLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Canonicalize Aliases"; } - - explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return canonicalizeAliases(M); } -}; -char CanonicalizeAliasesLegacyPass::ID = 0; - } // anonymous namespace PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, @@ -91,14 +74,3 @@ PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) -INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) - -namespace llvm { -ModulePass *createCanonicalizeAliasesPass() { - return new CanonicalizeAliasesLegacyPass(); -} -} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 8f053cd56e0e..1d348213bfdb 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -206,9 +206,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, }; // Avoid cloning types, compile units, and (other) subprograms. - for (DISubprogram *ISP : DIFinder->subprograms()) - if (ISP != SPClonedWithinModule) + SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs; + for (DISubprogram *ISP : DIFinder->subprograms()) { + if (ISP != SPClonedWithinModule) { mapToSelfIfNew(ISP); + MappedToSelfSPs.insert(ISP); + } + } + + // If a subprogram isn't going to be cloned skip its lexical blocks as well. + for (DIScope *S : DIFinder->scopes()) { + auto *LScope = dyn_cast<DILocalScope>(S); + if (LScope && MappedToSelfSPs.count(LScope->getSubprogram())) + mapToSelfIfNew(S); + } for (DICompileUnit *CU : DIFinder->compile_units()) mapToSelfIfNew(CU); @@ -723,14 +734,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } // If the loops above have made these phi nodes have 0 or 1 operand, - // replace them with undef or the input value. We must do this for + // replace them with poison or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { - Value *NV = UndefValue::get(PN->getType()); + Value *NV = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index e3e8f63383df..60f910bceab8 100644 --- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -117,65 +117,6 @@ static bool runOnFunction(Function &F, bool PostInlining) { return Changed; } -namespace { -struct EntryExitInstrumenter : public FunctionPass { - static char ID; - EntryExitInstrumenter() : FunctionPass(ID) { - initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } -}; -char EntryExitInstrumenter::ID = 0; - -struct PostInlineEntryExitInstrumenter : public FunctionPass { - static char ID; - PostInlineEntryExitInstrumenter() : FunctionPass(ID) { - initializePostInlineEntryExitInstrumenterPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } -}; -char PostInlineEntryExitInstrumenter::ID = 0; -} - -INITIALIZE_PASS_BEGIN( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) - -INITIALIZE_PASS_BEGIN( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) - -FunctionPass *llvm::createEntryExitInstrumenterPass() { - return new EntryExitInstrumenter(); -} - -FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() { - return new PostInlineEntryExitInstrumenter(); -} - PreservedAnalyses llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { runOnFunction(F, PostInlining); diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index 7b8d8553bac2..7509fde6df9d 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -301,9 +301,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) { - LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. + if (SI->isVolatile()) { + LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(SI->getOperand(1)); Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI); @@ -337,10 +337,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, if (!Res.first->second.write(Val, Offset, DL)) return false; } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) { + if (LI->isVolatile()) { LLVM_DEBUG( - dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. + dbgs() << "Found a Load! Volatile load, can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(LI->getOperand(0)); diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 193806d9cc87..8e6d4626c9fd 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -35,6 +35,13 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition( bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( const GlobalValue *SGV, ValueInfo VI) { assert(SGV->hasLocalLinkage()); + + // Ifuncs and ifunc alias does not have summary. + if (isa<GlobalIFunc>(SGV) || + (isa<GlobalAlias>(SGV) && + isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject()))) + return false; + // Both the imported references and the original local variable must // be promoted. if (!isPerformingImport() && !isModuleExporting()) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 2fb00f95b749..00387ec426bf 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2194,9 +2194,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); - // Calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (MarkNoUnwind) + // Call sites inlined through a 'nounwind' call site should be + // 'nounwind' as well. However, avoid marking call sites explicitly + // where possible. This helps expose more opportunities for CSE after + // inlining, commonly when the callee is an intrinsic. + if (MarkNoUnwind && !CI->doesNotThrow()) CI->setDoesNotThrow(); } } @@ -2625,7 +2627,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } else if (!CB.use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. - CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } // Since we are now done with the Call/Invoke, we can delete it. diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index b203259db1c6..2f1d0c2f9012 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -439,6 +439,10 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } + if (auto *CB = dyn_cast<CallBase>(I)) + if (isRemovableAlloc(CB, TLI)) + return true; + if (!I->willReturn()) return false; @@ -489,16 +493,13 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, } } - if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI)) - return true; - - if (CallInst *CI = isFreeCall(I, TLI)) - if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) - return C->isNullValue() || isa<UndefValue>(C); - - if (auto *Call = dyn_cast<CallBase>(I)) + if (auto *Call = dyn_cast<CallBase>(I)) { + if (Value *FreedOp = getFreedOperand(Call, TLI)) + if (Constant *C = dyn_cast<Constant>(FreedOp)) + return C->isNullValue() || isa<UndefValue>(C); if (isMathLibCallNoop(Call, TLI)) return true; + } // Non-volatile atomic loads from constants can be removed. if (auto *LI = dyn_cast<LoadInst>(I)) @@ -637,7 +638,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, // won't prove fruitful. if (!Visited.insert(I).second) { // Break the cycle and delete the instruction and its operands. - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU); return true; } @@ -750,8 +751,8 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); - // Replace self referencing PHI with undef, it must be dead. - if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + // Replace self referencing PHI with poison, it must be dead. + if (NewVal == PN) NewVal = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NewVal); PN->eraseFromParent(); } @@ -2105,7 +2106,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { // Delete the next to last instruction. Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { EndInst = Inst; continue; @@ -2144,7 +2145,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 0f33559c7e70..597c88ad13df 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -622,7 +622,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // We only need to split loop exit edges. Loop *PredLoop = LI->getLoopFor(ExitPred); if (!PredLoop || PredLoop->contains(Exit) || - ExitPred->getTerminator()->isIndirectTerminator()) + isa<IndirectBrInst>(ExitPred->getTerminator())) continue; SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 55d5c733733b..2ff8a3f7b228 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -127,7 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, // If the loop is branched to from an indirect terminator, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; // Keep track of it. @@ -256,7 +256,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } @@ -375,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, std::vector<BasicBlock*> BackedgeBlocks; for (BasicBlock *P : predecessors(Header)) { // Indirect edges cannot be split, so we must fail if we find one. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 82f993b4ceab..349063dd5e89 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -602,10 +602,10 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // loop will be already eliminated and we have less work to do but according // to API doc of User::dropAllReferences only valid operation after dropping // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. + // instruction from the loop with poison value of corresponding type first. for (auto *Block : L->blocks()) for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); + auto *Poison = PoisonValue::get(I.getType()); for (Use &U : llvm::make_early_inc_range(I.uses())) { if (auto *Usr = dyn_cast<Instruction>(U.getUser())) if (L->contains(Usr->getParent())) @@ -615,7 +615,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, if (DT) assert(!DT->isReachableFromEntry(U) && "Unexpected user in reachable block"); - U.set(Undef); + U.set(Poison); } auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); if (!DVI) @@ -1357,7 +1357,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) { + !Rewriter.isSafeToExpand(ExitValue)) { // TODO: This should probably be sunk into SCEV in some way; maybe a // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for // most SCEV expressions and other recurrence types (e.g. shift @@ -1370,7 +1370,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) + !Rewriter.isSafeToExpand(ExitValue)) continue; } diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp index 9914a5ca6c5e..2247b8107739 100644 --- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -31,7 +31,7 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0); Res = Builder.CreateInsertValue(Res, Equal, 1); CXI->replaceAllUsesWith(Res); diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index deaee467531d..d4ab4504064f 100644 --- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -81,26 +81,6 @@ bool llvm::nameUnamedGlobals(Module &M) { return Changed; } -namespace { - -// Legacy pass that provides a name to every anon globals. -class NameAnonGlobalLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Name Anon Globals"; } - - explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return nameUnamedGlobals(M); } -}; -char NameAnonGlobalLegacyPass::ID = 0; - -} // anonymous namespace - PreservedAnalyses NameAnonGlobalPass::run(Module &M, ModuleAnalysisManager &AM) { if (!nameUnamedGlobals(M)) @@ -108,14 +88,3 @@ PreservedAnalyses NameAnonGlobalPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) -INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) - -namespace llvm { -ModulePass *createNameAnonGlobalPass() { - return new NameAnonGlobalLegacyPass(); -} -} diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 0c8bf3827256..372cd74ea01d 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2568,9 +2568,7 @@ namespace { // only needed when the expression includes some subexpression that is not IV // derived. // -// Currently, we only allow division by a nonzero constant here. If this is -// inadequate, we could easily allow division by SCEVUnknown by using -// ValueTracking to check isKnownNonZero(). +// Currently, we only allow division by a value provably non-zero here. // // We cannot generally expand recurrences unless the step dominates the loop // header. The expander handles the special case of affine recurrences by @@ -2588,8 +2586,7 @@ struct SCEVFindUnsafe { bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); - if (!SC || SC->getValue()->isZero()) { + if (!SE.isKnownNonZero(D->getRHS())) { IsUnsafe = true; return false; } @@ -2613,18 +2610,17 @@ struct SCEVFindUnsafe { } bool isDone() const { return IsUnsafe; } }; -} +} // namespace -namespace llvm { -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) { +bool SCEVExpander::isSafeToExpand(const SCEV *S) const { SCEVFindUnsafe Search(SE, CanonicalMode); visitAll(S, Search); return !Search.IsUnsafe; } -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE) { - if (!isSafeToExpand(S, SE)) +bool SCEVExpander::isSafeToExpandAt(const SCEV *S, + const Instruction *InsertionPoint) const { + if (!isSafeToExpand(S)) return false; // We have to prove that the expanded site of S dominates InsertionPoint. // This is easy when not in the same block, but hard when S is an instruction @@ -2674,4 +2670,3 @@ void SCEVExpanderCleaner::cleanup() { I->eraseFromParent(); } } -} diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4b5ade99767b..1806081678a8 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4851,7 +4851,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { PN.moveBefore(InsertPt); // Also, add a dummy incoming value for the original BB itself, // so that the PHI is well-formed until we drop said predecessor. - PN.addIncoming(UndefValue::get(PN.getType()), BB); + PN.addIncoming(PoisonValue::get(PN.getType()), BB); } } diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index af15e0c31b75..0ab79a32f526 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -593,7 +593,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { } // Trunc no longer needed. - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); DeadInsts.emplace_back(TI); return true; } @@ -660,7 +660,7 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { auto *IP = GetLoopInvariantInsertPosition(L, I); - if (!isSafeToExpandAt(S, IP, *SE)) { + if (!Rewriter.isSafeToExpandAt(S, IP)) { LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I << " with non-speculable loop invariant: " << *S << '\n'); return false; @@ -679,20 +679,30 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { /// Eliminate redundant type cast between integer and float. bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { - if (UseInst->getOpcode() != CastInst::SIToFP) + if (UseInst->getOpcode() != CastInst::SIToFP && + UseInst->getOpcode() != CastInst::UIToFP) return false; Value *IVOperand = UseInst->getOperand(0); // Get the symbolic expression for this instruction. - ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand)); + const SCEV *IV = SE->getSCEV(IVOperand); + unsigned MaskBits; + if (UseInst->getOpcode() == CastInst::SIToFP) + MaskBits = SE->getSignedRange(IV).getMinSignedBits(); + else + MaskBits = SE->getUnsignedRange(IV).getActiveBits(); unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); - if (IVRange.getActiveBits() <= DestNumSigBits) { + if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { - // Match for fptosi of sitofp and with same type. - auto *CI = dyn_cast<FPToSIInst>(U); + // Match for fptosi/fptoui of sitofp and with same type. + auto *CI = dyn_cast<CastInst>(U); if (!CI || IVOperand->getType() != CI->getType()) continue; + CastInst::CastOps Opcode = CI->getOpcode(); + if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) + continue; + CI->replaceAllUsesWith(IVOperand); DeadInsts.push_back(CI); LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI @@ -1015,7 +1025,7 @@ class WidenIV { SmallPtrSet<Instruction *,16> Widened; - enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + enum class ExtendKind { Zero, Sign, Unknown }; // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. @@ -1172,7 +1182,7 @@ WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); - ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; + ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; } Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, @@ -1225,7 +1235,7 @@ Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { // about the narrow operand yet so must insert a [sz]ext. It is probably loop // invariant and will be folded or hoisted. If it actually comes from a // widened IV, it should be removed during a future call to widenIVUse. - bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, @@ -1290,7 +1300,7 @@ Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, return WideUse == WideAR; }; - bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; if (!GuessNonIVOperand(SignExtend)) { SignExtend = !SignExtend; if (!GuessNonIVOperand(SignExtend)) @@ -1350,7 +1360,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { // Only Add/Sub/Mul instructions supported yet. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. @@ -1362,14 +1372,14 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) ExtendOperExpr = SE->getSignExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap()) ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -1387,7 +1397,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1396,17 +1406,17 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { /// widening it's type? In other words, can the extend be safely hoisted out of /// the loop with SCEV reducing the value to a recurrence on the same loop. If /// so, return the extended recurrence and the kind of extension used. Otherwise -/// return {nullptr, Unknown}. +/// return {nullptr, ExtendKind::Unknown}. WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (!DU.NarrowUse->getType()->isIntegerTy()) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; } const SCEV *WideExpr; @@ -1414,21 +1424,21 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (DU.NeverNegative) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); if (isa<SCEVAddRecExpr>(WideExpr)) - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } - } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; } else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1468,7 +1478,7 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { // // (A) == icmp slt i32 sext(%narrow), sext(%val) // == icmp slt i32 zext(%narrow), sext(%val) - bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; @@ -1533,8 +1543,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(NarrowUse); ExtendKind ExtKind = getExtendKind(NarrowDef); - bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); - bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; // Check that all uses are either: @@ -1564,14 +1574,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // predicates. For equality, it's legal to widen icmp for either sign and // zero extend. For sign extend, we can also do so for signed predicates, // likeweise for zero extend we can widen icmp for unsigned predicates. - if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) return false; - if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) return false; ICmpUsers.push_back(ICmp); continue; } - if (ExtKind == SignExtended) + if (ExtKind == ExtendKind::Sign) User = dyn_cast<SExtInst>(User); else User = dyn_cast<ZExtInst>(User); @@ -1594,7 +1604,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // will most likely not see it. Let's try to prove it. if (OpCode != Instruction::Add) return false; - if (ExtKind != ZeroExtended) + if (ExtKind != ExtendKind::Zero) return false; const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); @@ -1609,7 +1619,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as // neg(zext(neg(op))), which is basically sext(op). - AnotherOpExtKind = SignExtended; + AnotherOpExtKind = ExtendKind::Sign; } // Verifying that Defining operand is an AddRec @@ -1621,14 +1631,16 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); // Generating a widening use instruction. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - AnotherOpExtKind, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - AnotherOpExtKind, NarrowUse); + Value *LHS = + (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); + Value *RHS = + (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); auto *NarrowBO = cast<BinaryOperator>(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, @@ -1667,7 +1679,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { auto ExtendedOp = [&](Value * V)->Value * { if (V == NarrowUse) return WideBO; - if (ExtKind == ZeroExtended) + if (ExtKind == ExtendKind::Zero) return Builder.CreateZExt(V, WideBO->getType()); else return Builder.CreateSExt(V, WideBO->getType()); @@ -1723,10 +1735,10 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri // This narrow use can be widened by a sext if it's non-negative or its narrow // def was widended by a sext. Same for zext. auto canWidenBySExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; }; auto canWidenByZExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; }; // Our raison d'etre! Eliminate sign and zero extension. @@ -1774,7 +1786,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri if (!WideAddRec.first) WideAddRec = getWideRecurrence(DU); - assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + assert((WideAddRec.first == nullptr) == + (WideAddRec.second == ExtendKind::Unknown)); if (!WideAddRec.first) { // If use is a loop condition, try to promote the condition instead of // truncating the IV first. @@ -1869,7 +1882,7 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { return nullptr; // Widen the induction variable expression. - const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign ? SE->getSignExtendExpr(AddRec, WideType) : SE->getZeroExtendExpr(AddRec, WideType); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index b359717424a6..bca3b0538c5d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1948,14 +1948,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + // If we can approximate pow: // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction + // pow(x, n) -> powi(x, n) if n is a constant signed integer value const APFloat *ExpoF; - if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && - !ExpoF->isExactlyValue(-0.5)) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { APFloat ExpoA(abs(*ExpoF)); APFloat ExpoI(*ExpoF); Value *Sqrt = nullptr; - if (AllowApprox && !ExpoA.isInteger()) { + if (!ExpoA.isInteger()) { APFloat Expo2 = ExpoA; // To check if ExpoA is an integer + 0.5, we add it to itself. If there // is no floating point exception and the result is an integer, then @@ -1979,7 +1981,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return nullptr; } - // pow(x, n) -> powi(x, n) if n is a constant signed integer value + // 0.5 fraction is now optionally handled. + // Do pow -> powi for remaining integer exponent APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 832353741500..9bbfe06b9abb 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -145,8 +145,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { // locate the exit blocks. SetVector<BasicBlock *> ExitingBlocks; SetVector<BasicBlock *> Exits; - // Record the exit blocks that branch to the same block. - MapVector<BasicBlock *, SetVector<BasicBlock *> > CommonSuccs; // We need SetVectors, but the Loop API takes a vector, so we use a temporary. SmallVector<BasicBlock *, 8> Temp; @@ -160,11 +158,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (SL == L || L->contains(SL)) continue; Exits.insert(S); - // The typical case for reducing the number of guard blocks occurs when - // the exit block has a single predecessor and successor. - if (S->getSinglePredecessor()) - if (auto *Succ = S->getSingleSuccessor()) - CommonSuccs[Succ].insert(S); } } @@ -179,39 +172,13 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { for (auto EB : ExitingBlocks) { dbgs() << " " << EB->getName(); } - dbgs() << "\n"; - - dbgs() << "Exit blocks with a common successor:\n"; - for (auto CS : CommonSuccs) { - dbgs() << " Succ " << CS.first->getName() << ", exits:"; - for (auto Exit : CS.second) - dbgs() << " " << Exit->getName(); - dbgs() << "\n"; - }); + dbgs() << "\n";); if (Exits.size() <= 1) { LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n"); return false; } - // When multiple exit blocks branch to the same block, change the control - // flow hub to after the exit blocks rather than before. This reduces the - // number of guard blocks needed after the loop. - for (auto CS : CommonSuccs) { - auto CB = CS.first; - auto Preds = CS.second; - if (Exits.contains(CB)) - continue; - if (Preds.size() < 2 || Preds.size() == Exits.size()) - continue; - for (auto Exit : Preds) { - Exits.remove(Exit); - ExitingBlocks.remove(Exit->getSinglePredecessor()); - ExitingBlocks.insert(Exit); - } - Exits.insert(CB); - } - SmallVector<BasicBlock *, 8> GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, @@ -231,17 +198,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (auto ParentLoop = L->getParentLoop()) { for (auto G : GuardBlocks) { ParentLoop->addBasicBlockToLoop(G, LI); - // Ensure the guard block predecessors are in a valid loop. After the - // change to the control flow hub for common successors, a guard block - // predecessor may not be in a loop or may be in an outer loop. - for (auto Pred : predecessors(G)) { - auto PredLoop = LI.getLoopFor(Pred); - if (!ParentLoop->contains(PredLoop)) { - if (PredLoop) - LI.removeBlock(Pred); - ParentLoop->addBasicBlockToLoop(Pred, LI); - } - } } ParentLoop->verifyLoop(); } diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index f34f2df971b1..d002922cfd30 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -28,7 +28,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeAssumeSimplifyPassLegacyPassPass(Registry); initializeAssumeBuilderPassLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); - initializeCanonicalizeAliasesLegacyPassPass(Registry); initializeCanonicalizeFreezeInLoopsPass(Registry); initializeInstNamerPass(Registry); initializeLCSSAWrapperPassPass(Registry); @@ -37,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); initializeLowerSwitchLegacyPassPass(Registry); - initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0777a1385916..b887ea41676b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -92,6 +92,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -473,7 +474,7 @@ public: virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(); /// Widen a single call instruction within the innermost loop. - void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands, + void widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. @@ -1447,15 +1448,14 @@ public: // through scalar predication or masked load/store or masked gather/scatter. // \p VF is the vectorization factor that will be used to vectorize \p I. // Superset of instructions that return true for isScalarWithPredication. - bool isPredicatedInst(Instruction *I, ElementCount VF, - bool IsKnownUniform = false) { - // When we know the load is uniform and the original scalar loop was not - // predicated we don't need to mark it as a predicated instruction. Any - // vectorised blocks created when tail-folding are something artificial we - // have introduced and we know there is always at least one active lane. - // That's why we call Legal->blockNeedsPredication here because it doesn't - // query tail-folding. - if (IsKnownUniform && isa<LoadInst>(I) && + bool isPredicatedInst(Instruction *I, ElementCount VF) { + // When we know the load's address is loop invariant and the instruction + // in the original scalar loop was unconditionally executed then we + // don't need to mark it as a predicated instruction. Tail folding may + // introduce additional predication, but we're guaranteed to always have + // at least one active lane. We call Legal->blockNeedsPredication here + // because it doesn't query tail-folding. + if (Legal->isUniformMemOp(*I) && isa<LoadInst>(I) && !Legal->blockNeedsPredication(I->getParent())) return false; if (!blockNeedsPredicationForAnyReason(I->getParent())) @@ -1657,10 +1657,6 @@ private: InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF) const; - /// Returns whether the instruction is a load or store and will be a emitted - /// as a vector operation. - bool isConsecutiveLoadOrStore(Instruction *I); - /// Returns true if an artificially high cost for emulated masked memrefs /// should be used. bool useEmulatedMaskMemRefHack(Instruction *I, ElementCount VF); @@ -1919,10 +1915,13 @@ public: auto DiffChecks = RtPtrChecking.getDiffChecks(); if (DiffChecks) { + Value *RuntimeVF = nullptr; MemRuntimeCheckCond = addDiffRuntimeChecks( MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp, - [VF](IRBuilderBase &B, unsigned Bits) { - return getRuntimeVF(B, B.getIntNTy(Bits), VF); + [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) { + if (!RuntimeVF) + RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF); + return RuntimeVF; }, IC); } else { @@ -2947,11 +2946,17 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { // If tail is to be folded, vector loop takes care of all iterations. Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); - auto CreateStep = [&]() { + auto CreateStep = [&]() -> Value * { // Create step with max(MinProTripCount, UF * VF). - if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue()) - return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); - return createStepForVF(Builder, CountTy, VF, UF); + if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue()) + return createStepForVF(Builder, CountTy, VF, UF); + + Value *MinProfTC = + createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); + if (!VF.isScalable()) + return MinProfTC; + return Builder.CreateBinaryIntrinsic( + Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF)); }; if (!Cost->foldTailByMasking()) @@ -4168,46 +4173,26 @@ bool InnerLoopVectorizer::useOrderedReductions( return Cost->useOrderedReductions(RdxDesc); } -/// A helper function for checking whether an integer division-related -/// instruction may divide by zero (in which case it must be predicated if -/// executed conditionally in the scalar code). -/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). -/// Non-zero divisors that are non compile-time constants will not be -/// converted into multiplication, so we will still end up scalarizing -/// the division, but can do so w/o predication. -static bool mayDivideByZero(Instruction &I) { - assert((I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::URem || - I.getOpcode() == Instruction::SRem) && - "Unexpected instruction"); - Value *Divisor = I.getOperand(1); - auto *CInt = dyn_cast<ConstantInt>(Divisor); - return !CInt || CInt->isZero(); -} - -void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, +void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State) { - assert(!isa<DbgInfoIntrinsic>(I) && + assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFromInst(&I); - - Module *M = I.getParent()->getParent()->getParent(); - auto *CI = cast<CallInst>(&I); + State.setDebugLocFromInst(&CI); SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI->args()) + for (Value *ArgOperand : CI.args()) Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize = false; - InstructionCost CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize); - InstructionCost IntrinsicCost = ID ? Cost->getVectorIntrinsicCost(CI, VF) : 0; + InstructionCost CallCost = Cost->getVectorCallCost(&CI, VF, NeedToScalarize); + InstructionCost IntrinsicCost = + ID ? Cost->getVectorIntrinsicCost(&CI, VF) : 0; bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost; assert((UseVectorIntrinsic || !NeedToScalarize) && "Instruction should be scalarized elsewhere."); @@ -4215,7 +4200,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, "Either the intrinsic cost or vector call cost must be valid"); for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Type *, 2> TysForDecl = {CI->getType()}; + SmallVector<Type *, 2> TysForDecl = {CI.getType()}; SmallVector<Value *, 4> Args; for (auto &I : enumerate(ArgOperands.operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -4235,27 +4220,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, if (UseVectorIntrinsic) { // Use vector version of the intrinsic. if (VF.isVector()) - TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF); + Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); assert(VectorF && "Can't retrieve vector intrinsic."); } else { // Use vector version of the function call. - const VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/); + const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/); #ifndef NDEBUG - assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr && + assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && "Can't create vector function."); #endif - VectorF = VFDatabase(*CI).getVectorizedFunction(Shape); + VectorF = VFDatabase(CI).getVectorizedFunction(Shape); } SmallVector<OperandBundleDef, 1> OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); + CI.getOperandBundlesAsDefs(OpBundles); CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); if (isa<FPMathOperator>(V)) - V->copyFastMathFlags(CI); + V->copyFastMathFlags(&CI); State.set(Def, V, Part); - State.addMetadata(V, &I); + State.addMetadata(V, &CI); } } @@ -4470,7 +4456,9 @@ bool LoopVectorizationCostModel::isScalarWithPredication( case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - return mayDivideByZero(*I); + // TODO: We can use the loop-preheader as context point here and get + // context sensitive reasoning + return !isSafeToSpeculativelyExecute(I); } return false; } @@ -5406,7 +5394,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( } LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() && - ChosenFactor.Cost >= ScalarCost.Cost) dbgs() + !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n"); @@ -6069,7 +6057,8 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I, // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. - assert(isPredicatedInst(I, VF) && "Expecting a scalar emulated instruction"); + assert((isPredicatedInst(I, VF) || Legal->isUniformMemOp(*I)) && + "Expecting a scalar emulated instruction"); return isa<LoadInst>(I) || (isa<StoreInst>(I) && NumPredStores > NumberOfStoresToPredicate); @@ -6779,19 +6768,29 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { NumPredStores++; if (Legal->isUniformMemOp(I)) { - // TODO: Avoid replicating loads and stores instead of - // relying on instcombine to remove them. + // Lowering story for uniform memory ops is currently a bit complicated. + // Scalarization works for everything which isn't a store with scalable + // VF. Fixed len VFs just scalarize and then DCE later; scalarization + // knows how to handle uniform-per-part values (i.e. the first lane + // in each unrolled VF) and can thus handle scalable loads too. For + // scalable stores, we use a scatter if legal. If not, we have no way + // to lower (currently) and thus have to abort vectorization. + if (isa<StoreInst>(&I) && VF.isScalable()) { + if (isLegalGatherOrScatter(&I, VF)) + setWideningDecision(&I, VF, CM_GatherScatter, + getGatherScatterCost(&I, VF)); + else + // Error case, abort vectorization + setWideningDecision(&I, VF, CM_Scalarize, + InstructionCost::getInvalid()); + continue; + } // Load: Scalar load + broadcast // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract - InstructionCost Cost; - if (isa<StoreInst>(&I) && VF.isScalable() && - isLegalGatherOrScatter(&I, VF)) { - Cost = getGatherScatterCost(&I, VF); - setWideningDecision(&I, VF, CM_GatherScatter, Cost); - } else { - Cost = getUniformMemOpCost(&I, VF); - setWideningDecision(&I, VF, CM_Scalarize, Cost); - } + // TODO: Avoid replicating loads and stores instead of relying on + // instcombine to remove them. + setWideningDecision(&I, VF, CM_Scalarize, + getUniformMemOpCost(&I, VF)); continue; } @@ -7146,13 +7145,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, InstWidening Decision = getWideningDecision(I, Width); assert(Decision != CM_Unknown && "CM decision should be taken at this point"); - if (Decision == CM_Scalarize) { - if (VF.isScalable() && isa<StoreInst>(I)) - // We can't scalarize a scalable vector store (even a uniform one - // currently), return an invalid cost so as to prevent vectorization. - return InstructionCost::getInvalid(); + if (getWideningCost(I, VF) == InstructionCost::getInvalid()) + return InstructionCost::getInvalid(); + if (Decision == CM_Scalarize) Width = ElementCount::getFixed(1); - } } VectorTy = ToVectorTy(getLoadStoreType(I), Width); return getMemoryInstructionCost(I, VF); @@ -7308,14 +7304,6 @@ Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced, } // end namespace llvm -bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { - // Check if the pointer operand of a load or store instruction is - // consecutive. - if (auto *Ptr = getLoadStorePointerOperand(Inst)) - return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr); - return false; -} - void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); @@ -8370,7 +8358,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( Range); bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isPredicatedInst(I, VF, IsUniform); }, + [&](ElementCount VF) { return CM.isPredicatedInst(I, VF); }, Range); // Even if the instruction is not marked as uniform, there are certain @@ -8406,8 +8394,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); - setRecipe(I, Recipe); - Plan->addVPValue(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -8426,6 +8412,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); + setRecipe(I, Recipe); + Plan->addVPValue(I, Recipe); VPBB->appendRecipe(Recipe); return VPBB; } @@ -8436,7 +8424,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( "predicated replication."); VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); + VPBlockBase *Region = createReplicateRegion(Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); @@ -8444,11 +8432,12 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return RegSucc; } -VPRegionBlock *VPRecipeBuilder::createReplicateRegion( - Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) { +VPRegionBlock * +VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe, + VPlanPtr &Plan) { + Instruction *Instr = PredRecipe->getUnderlyingInstr(); // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. - // Generate recipes to compute the block mask for this region. VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan); @@ -8461,9 +8450,13 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion( ? nullptr : new VPPredInstPHIRecipe(PredRecipe); if (PHIRecipe) { - Plan->removeVPValueFor(Instr); + setRecipe(Instr, PHIRecipe); Plan->addVPValue(Instr, PHIRecipe); + } else { + setRecipe(Instr, PredRecipe); + Plan->addVPValue(Instr, PredRecipe); } + auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); @@ -9564,12 +9557,19 @@ void VPReplicateRecipe::execute(VPTransformState &State) { return; } - // Generate scalar instances for all VF lanes of all UF parts, unless the - // instruction is uniform inwhich case generate only the first lane for each - // of the UF parts. - unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); - assert((!State.VF.isScalable() || IsUniform) && - "Can't scalarize a scalable vector"); + if (IsUniform) { + // Uniform within VL means we need to generate lane 0 only for each + // unrolled copy. + for (unsigned Part = 0; Part < State.UF; ++Part) + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, + VPIteration(Part, 0), IsPredicated, + State); + return; + } + + // Generate scalar instances for all VF lanes of all UF parts. + assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); + const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, @@ -9577,52 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State); } -void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); - Instruction *ScalarPredInst = - cast<Instruction>(State.get(getOperand(0), *State.Instance)); - BasicBlock *PredicatedBB = ScalarPredInst->getParent(); - BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); - assert(PredicatingBB && "Predicated block has no single predecessor."); - assert(isa<VPReplicateRecipe>(getOperand(0)) && - "operand must be VPReplicateRecipe"); - - // By current pack/unpack logic we need to generate only a single phi node: if - // a vector value for the predicated instruction exists at this point it means - // the instruction has vector users only, and a phi for the vector value is - // needed. In this case the recipe of the predicated instruction is marked to - // also do that packing, thereby "hoisting" the insert-element sequence. - // Otherwise, a phi node for the scalar value is needed. - unsigned Part = State.Instance->Part; - if (State.hasVectorValue(getOperand(0), Part)) { - Value *VectorValue = State.get(getOperand(0), Part); - InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); - PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); - VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. - VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - if (State.hasVectorValue(this, Part)) - State.reset(this, VPhi, Part); - else - State.set(this, VPhi, Part); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), VPhi, Part); - } else { - Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); - PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), - PredicatingBB); - Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); - else - State.set(this, Phi, *State.Instance); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); - } -} - void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; @@ -9793,8 +9747,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( }; // 4) if the TTI hook indicates this is profitable, request predication. - if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, - LVL.getLAI())) + if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL)) return CM_ScalarEpilogueNotNeededUsePredicate; return CM_ScalarEpilogueAllowed; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e136cd9aedac..cd044c78d900 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3744,7 +3744,7 @@ void BoUpSLP::reorderTopToBottom() { unsigned Opcode0 = TE->getOpcode(); unsigned Opcode1 = TE->getAltOpcode(); // The opcode mask selects between the two opcodes. - SmallBitVector OpcodeMask(TE->Scalars.size(), 0); + SmallBitVector OpcodeMask(TE->Scalars.size(), false); for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1) OpcodeMask.set(Lane); @@ -4814,6 +4814,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, return; } + // Don't go into catchswitch blocks, which can happen with PHIs. + // Such blocks can only have PHIs and the catchswitch. There is no + // place to insert a shuffle if we need to, so just avoid that issue. + if (isa<CatchSwitchInst>(BB->getTerminator())) { + LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } + // Check that every instruction appears once in this bundle. if (!TryToFindDuplicates(S)) return; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index c7949c42c03e..07d3fa56020b 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -157,10 +157,8 @@ public: return Ingredient2Recipe[I]; } - /// Create a replicating region for instruction \p I that requires - /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, - VPReplicateRecipe *PredRecipe, + /// Create a replicating region for \p PredRecipe. + VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlanPtr &Plan); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fdd901a4a70d..cb7507264667 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -892,6 +892,52 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { ReplaceInstWithInst(CurrentTerminator, CondBr); } +void VPPredInstPHIRecipe::execute(VPTransformState &State) { + assert(State.Instance && "Predicated instruction PHI works per instance."); + Instruction *ScalarPredInst = + cast<Instruction>(State.get(getOperand(0), *State.Instance)); + BasicBlock *PredicatedBB = ScalarPredInst->getParent(); + BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); + assert(PredicatingBB && "Predicated block has no single predecessor."); + assert(isa<VPReplicateRecipe>(getOperand(0)) && + "operand must be VPReplicateRecipe"); + + // By current pack/unpack logic we need to generate only a single phi node: if + // a vector value for the predicated instruction exists at this point it means + // the instruction has vector users only, and a phi for the vector value is + // needed. In this case the recipe of the predicated instruction is marked to + // also do that packing, thereby "hoisting" the insert-element sequence. + // Otherwise, a phi node for the scalar value is needed. + unsigned Part = State.Instance->Part; + if (State.hasVectorValue(getOperand(0), Part)) { + Value *VectorValue = State.get(getOperand(0), Part); + InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); + PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); + VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. + VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. + if (State.hasVectorValue(this, Part)) + State.reset(this, VPhi, Part); + else + State.set(this, VPhi, Part); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), VPhi, Part); + } else { + Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); + PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); + Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), + PredicatingBB); + Phi->addIncoming(ScalarPredInst, PredicatedBB); + if (State.hasScalarValue(this, *State.Instance)) + State.reset(this, Phi, *State.Instance); + else + State.set(this, Phi, *State.Instance); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), Phi, *State.Instance); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 3501de6ab38e..43e0a40fedb9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -133,7 +133,9 @@ void VPlanVerifier::verifyHierarchicalCFG( verifyRegionRec(TopRegion); } -static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { +static bool +verifyVPBasicBlock(const VPBasicBlock *VPBB, + DenseMap<const VPBlockBase *, unsigned> &BlockNumbering) { // Verify that phi-like recipes are at the beginning of the block, with no // other recipes in between. auto RecipeI = VPBB->begin(); @@ -165,15 +167,71 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { RecipeI++; } + // Verify that defs in VPBB dominate all their uses. The current + // implementation is still incomplete. + DenseMap<const VPRecipeBase *, unsigned> RecipeNumbering; + unsigned Cnt = 0; + for (const VPRecipeBase &R : *VPBB) + RecipeNumbering[&R] = Cnt++; + + for (const VPRecipeBase &R : *VPBB) { + for (const VPValue *V : R.definedValues()) { + for (const VPUser *U : V->users()) { + auto *UI = dyn_cast<VPRecipeBase>(U); + if (!UI || isa<VPHeaderPHIRecipe>(UI)) + continue; + + // If the user is in the same block, check it comes after R in the + // block. + if (UI->getParent() == VPBB) { + if (RecipeNumbering[UI] < RecipeNumbering[&R]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // Skip blocks outside any region for now and blocks outside + // replicate-regions. + auto *ParentR = VPBB->getParent(); + if (!ParentR || !ParentR->isReplicator()) + continue; + + // For replicators, verify that VPPRedInstPHIRecipe defs are only used + // in subsequent blocks. + if (isa<VPPredInstPHIRecipe>(&R)) { + auto I = BlockNumbering.find(UI->getParent()); + unsigned BlockNumber = I == BlockNumbering.end() ? std::numeric_limits<unsigned>::max() : I->second; + if (BlockNumber < BlockNumbering[ParentR]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // All non-VPPredInstPHIRecipe recipes in the block must be used in + // the replicate region only. + if (UI->getParent()->getParent() != ParentR) { + errs() << "Use before def!\n"; + return false; + } + } + } + } return true; } bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { + DenseMap<const VPBlockBase *, unsigned> BlockNumbering; + unsigned Cnt = 0; auto Iter = depth_first( VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) { - if (!verifyVPBasicBlock(VPBB)) + for (const VPBlockBase *VPB : Iter) { + BlockNumbering[VPB] = Cnt++; + auto *VPBB = dyn_cast<VPBasicBlock>(VPB); + if (!VPBB) + continue; + if (!verifyVPBasicBlock(VPBB, BlockNumbering)) return false; } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index d12624ffb824..a38936644bd3 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1302,7 +1302,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { for (ShuffleVectorInst *SV : Shuffles) { for (auto U : SV->users()) { ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U); - if (SSV && isa<UndefValue>(SSV->getOperand(1))) + if (SSV && isa<UndefValue>(SSV->getOperand(1)) && SSV->getType() == VT) Shuffles.push_back(SSV); } } |