aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-02-05 20:07:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-05-14 11:44:47 +0000
commit1fd87a682ad7442327078e1eeb63edc4258f9815 (patch)
tree83b42223e987ef7df2e1036937bc1bb627fa2779 /contrib/llvm-project/llvm/lib/Transforms
parent04eeddc0aa8e0a417a16eaf9d7d095207f4a8623 (diff)
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
downloadsrc-1fd87a682ad7442327078e1eeb63edc4258f9815.tar.gz
src-1fd87a682ad7442327078e1eeb63edc4258f9815.zip
Merge llvm-project main llvmorg-14-init-18294-gdb01b123d012
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-14-init-18294-gdb01b123d012, the last commit before the upstream release/14.x branch was created. PR: 261742 MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp244
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp560
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp233
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp285
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h89
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp1
56 files changed, 1977 insertions, 806 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 92acfb93057a..9c16d3750998 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -23,6 +23,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index ce3c5153bde2..e6a542385662 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -46,6 +46,7 @@
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
@@ -365,26 +366,25 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin();
- I != E; ++I) {
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Function::arg_iterator I2 = NF->arg_begin();
+ for (Argument &Arg : F->args()) {
+ if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
- I->replaceAllUsesWith(&*I2);
- I2->takeName(&*I);
+ Arg.replaceAllUsesWith(&*I2);
+ I2->takeName(&Arg);
++I2;
continue;
}
- if (ByValArgsToTransform.count(&*I)) {
+ if (ByValArgsToTransform.count(&Arg)) {
// In the callee, we create an alloca, and store each of the new incoming
// arguments into the alloca.
Instruction *InsertPt = &NF->begin()->front();
// Just add all the struct element types.
- Type *AgTy = I->getParamByValType();
- Align StructAlign = *I->getParamAlign();
+ Type *AgTy = Arg.getParamByValType();
+ Align StructAlign = *Arg.getParamAlign();
Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
StructAlign, "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
@@ -397,41 +397,41 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Value *Idx = GetElementPtrInst::Create(
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
- I2->setName(I->getName() + "." + Twine(i));
+ I2->setName(Arg.getName() + "." + Twine(i));
Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
}
// Anything that used the arg should now use the alloca.
- I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&*I);
+ Arg.replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&Arg);
continue;
}
// There potentially are metadata uses for things like llvm.dbg.value.
// Replace them with undef, after handling the other regular uses.
auto RauwUndefMetadata = make_scope_exit(
- [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
+ [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
- if (I->use_empty())
+ if (Arg.use_empty())
continue;
// Otherwise, if we promoted this argument, then all users are load
// instructions (or GEPs with only load users), and all loads should be
// using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
- while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ while (!Arg.use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
- I2->setName(I->getName() + ".val");
+ I2->setName(Arg.getName() + ".val");
LI->replaceAllUsesWith(&*I2);
LI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
<< "' in function '" << F->getName() << "'\n");
} else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
assert(!GEP->use_empty() &&
"GEPs without uses should be cleaned up already");
IndicesVector Operands;
@@ -449,7 +449,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
assert(It != ArgIndices.end() && "GEP not handled??");
}
- TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(),
+ TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
make_range(Operands.begin(), Operands.end())));
LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
@@ -610,12 +610,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true;
};
- // First, iterate the entry block and mark loads of (geps of) arguments as
- // safe.
+ // First, iterate functions that are guaranteed to execution on function
+ // entry and mark loads of (geps of) arguments as safe.
BasicBlock &EntryBlock = Arg->getParent()->front();
// Declare this here so we can reuse it
IndicesVector Indices;
- for (Instruction &I : EntryBlock)
+ for (Instruction &I : EntryBlock) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *V = LI->getPointerOperand();
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
@@ -649,6 +649,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
}
}
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+
// Now, iterate all uses of the argument to see if there are any uses that are
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
SmallVector<LoadInst *, 16> Loads;
@@ -830,7 +834,10 @@ static bool canPaddingBeAccessed(Argument *arg) {
return false;
}
-bool ArgumentPromotionPass::areFunctionArgsABICompatible(
+/// Check if callers and the callee \p F agree how promoted arguments would be
+/// passed. The ones that they do not agree on are eliminated from the sets but
+/// the return value has to be observed as well.
+static bool areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
@@ -1003,7 +1010,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr;
- if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+ if (!areFunctionArgsABICompatible(
*F, TTI, ArgsToPromote, ByValArgsToTransform))
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index 12b8a0ef9d00..d66140a726f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -183,6 +183,31 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
}
///}
+bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
+ const AbstractAttribute &QueryingAA) {
+ // We are looking for volatile instructions or non-relaxed atomics.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ // Non-convergent and readnone imply nosync.
+ if (!CB->isConvergent() && !CB->mayReadOrWriteMemory())
+ return true;
+
+ if (AANoSync::isNoSyncIntrinsic(&I))
+ return true;
+
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
+ return NoSyncAA.isAssumedNoSync();
+ }
+
+ if (!I.mayReadOrWriteMemory())
+ return true;
+
+ return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I);
+}
+
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V) {
if (auto *C = dyn_cast<Constant>(&V))
@@ -370,6 +395,162 @@ bool AA::getPotentialCopiesOfStoredValue(
return true;
}
+static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA,
+ bool RequireReadNone, bool &IsKnown) {
+
+ IRPosition::Kind Kind = IRP.getPositionKind();
+ if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) {
+ const auto &MemLocAA =
+ A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE);
+ if (MemLocAA.isAssumedReadNone()) {
+ IsKnown = MemLocAA.isKnownReadNone();
+ if (!IsKnown)
+ A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
+ return true;
+ }
+ }
+
+ const auto &MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE);
+ if (MemBehaviorAA.isAssumedReadNone() ||
+ (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) {
+ IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone()
+ : MemBehaviorAA.isKnownReadOnly();
+ if (!IsKnown)
+ A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
+ return true;
+ }
+
+ return false;
+}
+
+bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown) {
+ return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+ /* RequireReadNone */ false, IsKnown);
+}
+bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP,
+ const AbstractAttribute &QueryingAA, bool &IsKnown) {
+ return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+ /* RequireReadNone */ true, IsKnown);
+}
+
+static bool
+isPotentiallyReachable(Attributor &A, const Instruction &FromI,
+ const Instruction *ToI, const Function &ToFn,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName()
+ << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB)
+ << "]\n");
+
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *> Worklist;
+ Worklist.push_back(&FromI);
+
+ while (!Worklist.empty()) {
+ const Instruction *CurFromI = Worklist.pop_back_val();
+ if (!Visited.insert(CurFromI).second)
+ continue;
+
+ const Function *FromFn = CurFromI->getFunction();
+ if (FromFn == &ToFn) {
+ if (!ToI)
+ return true;
+ LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI
+ << " intraprocedurally\n");
+ const auto &ReachabilityAA = A.getAAFor<AAReachability>(
+ QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
+ bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI);
+ LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " "
+ << (Result ? "can potentially " : "cannot ") << "reach "
+ << *ToI << " [Intra]\n");
+ if (Result)
+ return true;
+ continue;
+ }
+
+ // TODO: If we can go arbitrarily backwards we will eventually reach an
+ // entry point that can reach ToI. Only once this takes a set of blocks
+ // through which we cannot go, or once we track internal functions not
+ // accessible from the outside, it makes sense to perform backwards analysis
+ // in the absence of a GoBackwardsCB.
+ if (!GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from "
+ << *CurFromI << " is not checked backwards, abort\n");
+ return true;
+ }
+
+ // Check if the current instruction is already known to reach the ToFn.
+ const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>(
+ QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
+ bool Result = FnReachabilityAA.instructionCanReach(
+ A, *CurFromI, ToFn, /* UseBackwards */ false);
+ LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
+ << " " << (Result ? "can potentially " : "cannot ")
+ << "reach @" << ToFn.getName() << " [FromFn]\n");
+ if (Result)
+ return true;
+
+ // If we do not go backwards from the FromFn we are done here and so far we
+ // could not find a way to reach ToFn/ToI.
+ if (!GoBackwardsCB(*FromFn))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @"
+ << FromFn->getName() << "\n");
+
+ auto CheckCallSite = [&](AbstractCallSite ACS) {
+ CallBase *CB = ACS.getInstruction();
+ if (!CB)
+ return false;
+
+ if (isa<InvokeInst>(CB))
+ return false;
+
+ Instruction *Inst = CB->getNextNonDebugInstruction();
+ Worklist.push_back(Inst);
+ return true;
+ };
+
+ bool AllCallSitesKnown;
+ Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
+ /* RequireAllCallSites */ true,
+ &QueryingAA, AllCallSitesKnown);
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
+ << " in @" << FromFn->getName()
+ << " failed, give up\n");
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI
+ << " in @" << FromFn->getName()
+ << " worklist size is: " << Worklist.size() << "\n");
+ }
+ return false;
+}
+
+bool AA::isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Instruction &ToI,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from "
+ << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n");
+ const Function *ToFn = ToI.getFunction();
+ return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA,
+ GoBackwardsCB);
+}
+
+bool AA::isPotentiallyReachable(
+ Attributor &A, const Instruction &FromI, const Function &ToFn,
+ const AbstractAttribute &QueryingAA,
+ std::function<bool(const Function &F)> GoBackwardsCB) {
+ return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA,
+ GoBackwardsCB);
+}
+
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
@@ -704,9 +885,8 @@ void IRPosition::verify() {
"Expected a nullptr for an invalid position!");
return;
case IRP_FLOAT:
- assert((!isa<CallBase>(&getAssociatedValue()) &&
- !isa<Argument>(&getAssociatedValue())) &&
- "Expected specialized kind for call base and argument values!");
+ assert((!isa<Argument>(&getAssociatedValue())) &&
+ "Expected specialized kind for argument values!");
return;
case IRP_RETURNED:
assert(isa<Function>(getAsValuePtr()) &&
@@ -900,7 +1080,7 @@ bool Attributor::isAssumedDead(const Use &U,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
- return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA,
+ return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
@@ -923,7 +1103,8 @@ bool Attributor::isAssumedDead(const Instruction &I,
// If we have a context instruction and a liveness AA we use it.
if (FnLivenessAA &&
FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() &&
- FnLivenessAA->isAssumedDead(&I)) {
+ (CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent())
+ : FnLivenessAA->isAssumedDead(&I))) {
if (QueryingAA)
recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
if (!FnLivenessAA->isKnownDead(&I))
@@ -934,8 +1115,9 @@ bool Attributor::isAssumedDead(const Instruction &I,
if (CheckBBLivenessOnly)
return false;
- const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>(
- IRPosition::value(I, CBCtx), QueryingAA, DepClassTy::NONE);
+ const IRPosition IRP = IRPosition::inst(I, CBCtx);
+ const AAIsDead &IsDeadAA =
+ getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't check liveness for AAIsDead.
if (QueryingAA == &IsDeadAA)
return false;
@@ -1035,8 +1217,14 @@ bool Attributor::checkForAllUses(
const Use *U = Worklist.pop_back_val();
if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
- << *U->getUser() << "\n");
+ LLVM_DEBUG({
+ if (auto *Fn = dyn_cast<Function>(U->getUser()))
+ dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName()
+ << "\n";
+ else
+ dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser()
+ << "\n";
+ });
bool UsedAssumedInformation = false;
if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
CheckBBLivenessOnly, LivenessDepClass)) {
@@ -1126,8 +1314,14 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use &U = *Uses[u];
- LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
- << *U.getUser() << "\n");
+ LLVM_DEBUG({
+ if (auto *Fn = dyn_cast<Function>(U))
+ dbgs() << "[Attributor] Check use: " << Fn->getName() << " in "
+ << *U.getUser() << "\n";
+ else
+ dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
+ << "\n";
+ });
bool UsedAssumedInformation = false;
if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true)) {
@@ -1268,9 +1462,12 @@ static bool checkForAllInstructionsImpl(
for (Instruction *I : *Insts) {
// Skip dead instructions.
if (A && !CheckPotentiallyDead &&
- A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
- UsedAssumedInformation, CheckBBLivenessOnly))
+ A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA,
+ UsedAssumedInformation, CheckBBLivenessOnly)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I
+ << " is potentially dead, skip!\n";);
continue;
+ }
if (!Pred(*I))
return false;
@@ -1329,7 +1526,7 @@ bool Attributor::checkForAllReadWriteInstructions(
for (Instruction *I :
InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
// Skip dead instructions.
- if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA,
+ if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA,
UsedAssumedInformation))
continue;
@@ -1381,9 +1578,11 @@ void Attributor::runTillFixpoint() {
InvalidAA->Deps.pop_back();
AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
+ LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA);
Worklist.insert(DepAA);
continue;
}
+ LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA);
DepAA->getState().indicatePessimisticFixpoint();
assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!");
if (!DepAA->getState().isValidState())
@@ -1433,6 +1632,9 @@ void Attributor::runTillFixpoint() {
// Note that dependent ones are added above.
Worklist.clear();
Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+ Worklist.insert(QueryAAsAwaitingUpdate.begin(),
+ QueryAAsAwaitingUpdate.end());
+ QueryAAsAwaitingUpdate.clear();
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
@@ -1492,6 +1694,12 @@ void Attributor::runTillFixpoint() {
}
}
+void Attributor::registerForUpdate(AbstractAttribute &AA) {
+ assert(AA.isQueryAA() &&
+ "Non-query AAs should not be required to register for updates!");
+ QueryAAsAwaitingUpdate.insert(&AA);
+}
+
ChangeStatus Attributor::manifestAttributes() {
TimeTraceScope TimeScope("Attributor::manifestAttributes");
size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
@@ -1792,7 +2000,7 @@ ChangeStatus Attributor::cleanupIR() {
// Actually we do not delete the blocks but squash them into a single
// unreachable but untangling branches that jump here is something we need
// to do in a more generic way.
- DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
+ detachDeadBlocks(ToBeDeletedBBs, nullptr);
}
identifyDeadInternalFunctions();
@@ -1897,7 +2105,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
/* CheckBBLivenessOnly */ true))
CS = AA.update(*this);
- if (DV.empty()) {
+ if (!AA.isQueryAA() && DV.empty()) {
// If the attribute did not query any non-fix information, the state
// will not change and we can indicate that right away.
AAState.indicateOptimisticFixpoint();
@@ -2601,12 +2809,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
- IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+ IRPosition CBInstPos = IRPosition::inst(CB);
IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
- getOrCreateAAFor<AAIsDead>(CBRetPos);
+ getOrCreateAAFor<AAIsDead>(CBInstPos);
Function *Callee = CB.getCalledFunction();
// TODO: Even if the callee is not known now we might be able to simplify
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 76420783b2d1..2d88e329e093 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -68,6 +69,12 @@ static cl::opt<unsigned, true> MaxPotentialValues(
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
+static cl::opt<unsigned>
+ MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
+ cl::desc("Maximum number of interfering writes to "
+ "check before assuming all might interfere."),
+ cl::init(6));
+
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
@@ -244,6 +251,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
/// once. Note that the value used for the callback may still be the value
/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
/// we will never visit more values than specified by \p MaxValues.
+/// If \p Intraprocedural is set to true only values valid in the scope of
+/// \p CtxI will be visited and simplification into other scopes is prevented.
template <typename StateTy>
static bool genericValueTraversal(
Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
@@ -251,7 +260,8 @@ static bool genericValueTraversal(
function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
VisitValueCB,
const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
- function_ref<Value *(Value *)> StripCB = nullptr) {
+ function_ref<Value *(Value *)> StripCB = nullptr,
+ bool Intraprocedural = false) {
const AAIsDead *LivenessAA = nullptr;
if (IRP.getAnchorScope())
@@ -281,8 +291,11 @@ static bool genericValueTraversal(
continue;
// Make sure we limit the compile time for complex expressions.
- if (Iteration++ >= MaxValues)
+ if (Iteration++ >= MaxValues) {
+ LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
+ << Iteration << "!\n");
return false;
+ }
// Explicitly look through calls with a "returned" attribute if we do
// not have a pointer as stripPointerCasts only works on them.
@@ -331,10 +344,7 @@ static bool genericValueTraversal(
"Expected liveness in the presence of instructions!");
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- bool UsedAssumedInformation = false;
- if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA,
- LivenessAA, UsedAssumedInformation,
- /* CheckBBLivenessOnly */ true)) {
+ if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
AnyDead = true;
continue;
}
@@ -344,24 +354,49 @@ static bool genericValueTraversal(
continue;
}
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
+ SmallVector<Item> CallSiteValues;
+ bool AllCallSitesKnown = true;
+ if (A.checkForAllCallSites(
+ [&](AbstractCallSite ACS) {
+ // Callbacks might not have a corresponding call site operand,
+ // stick with the argument in that case.
+ Value *CSOp = ACS.getCallArgOperand(*Arg);
+ if (!CSOp)
+ return false;
+ CallSiteValues.push_back({CSOp, ACS.getInstruction()});
+ return true;
+ },
+ *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+ Worklist.append(CallSiteValues);
+ continue;
+ }
+ }
+ }
+
if (UseValueSimplify && !isa<Constant>(V)) {
bool UsedAssumedInformation = false;
Optional<Value *> SimpleV =
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
if (!SimpleV.hasValue())
continue;
- if (!SimpleV.getValue())
- return false;
Value *NewV = SimpleV.getValue();
- if (NewV != V) {
- Worklist.push_back({NewV, CtxI});
- continue;
+ if (NewV && NewV != V) {
+ if (!Intraprocedural || !CtxI ||
+ AA::isValidInScope(*NewV, CtxI->getFunction())) {
+ Worklist.push_back({NewV, CtxI});
+ continue;
+ }
}
}
// Once a leaf is reached we inform the user through the callback.
- if (!VisitValueCB(*V, CtxI, State, Iteration > 1))
+ if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
+ LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
+ << *V << "!\n");
return false;
+ }
} while (!Worklist.empty());
// If we actually used liveness information so we have to record a dependence.
@@ -375,7 +410,8 @@ static bool genericValueTraversal(
bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
- const Instruction *CtxI) {
+ const Instruction *CtxI,
+ bool Intraprocedural) {
auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
SmallPtrSet<Value *, 8> SeenObjects;
auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
@@ -387,7 +423,7 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
};
if (!genericValueTraversal<decltype(Objects)>(
A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- true, 32, StripCB))
+ true, 32, StripCB, Intraprocedural))
return false;
return true;
}
@@ -620,7 +656,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
if (!AssociatedFunction)
return S.indicatePessimisticFixpoint();
- CallBase &CBContext = static_cast<CallBase &>(this->getAnchorValue());
+ CallBase &CBContext = cast<CallBase>(this->getAnchorValue());
if (IntroduceCallBaseContext)
LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:"
<< CBContext << "\n");
@@ -1026,7 +1062,6 @@ private:
BooleanState BS;
};
-namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1058,6 +1093,165 @@ struct AAPointerInfoImpl
const override {
return State::forallInterferingAccesses(SI, CB);
}
+ bool forallInterferingWrites(
+ Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ function_ref<bool(const Access &, bool)> UserCB) const override {
+ SmallPtrSet<const Access *, 8> DominatingWrites;
+ SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
+
+ Function &Scope = *LI.getFunction();
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+ const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+ const bool NoSync = NoSyncAA.isAssumedNoSync();
+
+ // Helper to determine if we need to consider threading, which we cannot
+ // right now. However, if the function is (assumed) nosync or the thread
+ // executing all instructions is the main thread only we can ignore
+ // threading.
+ auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
+ if (NoSync)
+ return true;
+ if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ return true;
+ return false;
+ };
+
+ // Helper to determine if the access is executed by the same thread as the
+ // load, for now it is sufficient to avoid any potential threading effects
+ // as we cannot deal with them anyway.
+ auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
+ return CanIgnoreThreading(*Acc.getLocalInst());
+ };
+
+ // TODO: Use inter-procedural reachability and dominance.
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ QueryingAA, IRPosition::function(*LI.getFunction()),
+ DepClassTy::OPTIONAL);
+
+ const bool CanUseCFGResoning = CanIgnoreThreading(LI);
+ InformationCache &InfoCache = A.getInfoCache();
+ const DominatorTree *DT =
+ NoRecurseAA.isKnownNoRecurse()
+ ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
+ Scope)
+ : nullptr;
+
+ enum GPUAddressSpace : unsigned {
+ Generic = 0,
+ Global = 1,
+ Shared = 3,
+ Constant = 4,
+ Local = 5,
+ };
+
+ // Helper to check if a value has "kernel lifetime", that is it will not
+ // outlive a GPU kernel. This is true for shared, constant, and local
+ // globals on AMD and NVIDIA GPUs.
+ auto HasKernelLifetime = [&](Value *V, Module &M) {
+ Triple T(M.getTargetTriple());
+ if (!(T.isAMDGPU() || T.isNVPTX()))
+ return false;
+ switch (V->getType()->getPointerAddressSpace()) {
+ case GPUAddressSpace::Shared:
+ case GPUAddressSpace::Constant:
+ case GPUAddressSpace::Local:
+ return true;
+ default:
+ return false;
+ };
+ };
+
+ // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query
+ // to determine if we should look at reachability from the callee. For
+ // certain pointers we know the lifetime and we do not have to step into the
+ // callee to determine reachability as the pointer would be dead in the
+ // callee. See the conditional initialization below.
+ std::function<bool(const Function &)> IsLiveInCalleeCB;
+
+ if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) {
+ // If the alloca containing function is not recursive the alloca
+ // must be dead in the callee.
+ const Function *AIFn = AI->getFunction();
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL);
+ if (NoRecurseAA.isAssumedNoRecurse()) {
+ IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; };
+ }
+ } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
+ // If the global has kernel lifetime we can stop if we reach a kernel
+ // as it is "dead" in the (unknown) callees.
+ if (HasKernelLifetime(GV, *GV->getParent()))
+ IsLiveInCalleeCB = [](const Function &Fn) {
+ return !Fn.hasFnAttribute("kernel");
+ };
+ }
+
+ auto AccessCB = [&](const Access &Acc, bool Exact) {
+ if (!Acc.isWrite())
+ return true;
+
+ // For now we only filter accesses based on CFG reasoning which does not
+ // work yet if we have threading effects, or the access is complicated.
+ if (CanUseCFGResoning) {
+ if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
+ IsLiveInCalleeCB))
+ return true;
+ if (DT && Exact &&
+ (Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
+ IsSameThreadAsLoad(Acc)) {
+ if (DT->dominates(Acc.getLocalInst(), &LI))
+ DominatingWrites.insert(&Acc);
+ }
+ }
+
+ InterferingWrites.push_back({&Acc, Exact});
+ return true;
+ };
+ if (!State::forallInterferingAccesses(LI, AccessCB))
+ return false;
+
+ // If we cannot use CFG reasoning we only filter the non-write accesses
+ // and are done here.
+ if (!CanUseCFGResoning) {
+ for (auto &It : InterferingWrites)
+ if (!UserCB(*It.first, It.second))
+ return false;
+ return true;
+ }
+
+ // Helper to determine if we can skip a specific write access. This is in
+ // the worst case quadratic as we are looking for another write that will
+ // hide the effect of this one.
+ auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+ if (!IsSameThreadAsLoad(Acc))
+ return false;
+ if (!DominatingWrites.count(&Acc))
+ return false;
+ for (const Access *DomAcc : DominatingWrites) {
+ assert(Acc.getLocalInst()->getFunction() ==
+ DomAcc->getLocalInst()->getFunction() &&
+ "Expected dominating writes to be in the same function!");
+
+ if (DomAcc != &Acc &&
+ DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Run the user callback on all writes we cannot skip and return if that
+ // succeeded for all or not.
+ unsigned NumInterferingWrites = InterferingWrites.size();
+ for (auto &It : InterferingWrites)
+ if (!DT || NumInterferingWrites > MaxInterferingWrites ||
+ !CanSkipAccess(*It.first, It.second))
+ if (!UserCB(*It.first, It.second))
+ return false;
+ return true;
+ }
ChangeStatus translateAndAddCalleeState(Attributor &A,
const AAPointerInfo &CalleeAA,
@@ -1200,9 +1394,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
<< " : " << *Idx << "\n");
return false;
}
- UsrOI.Offset = PtrOI.Offset +
- DL.getIndexedOffsetInType(
- GEP->getSourceElementType(), Indices);
+ UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType(
+ GEP->getSourceElementType(), Indices);
Follow = true;
return true;
}
@@ -1693,17 +1886,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
bool) -> bool {
- bool UsedAssumedInformation = false;
- Optional<Value *> SimpleRetVal =
- A.getAssumedSimplified(V, *this, UsedAssumedInformation);
- if (!SimpleRetVal.hasValue())
- return true;
- if (!SimpleRetVal.getValue())
- return false;
- Value *RetVal = *SimpleRetVal;
- assert(AA::isValidInScope(*RetVal, Ret.getFunction()) &&
+ assert(AA::isValidInScope(V, Ret.getFunction()) &&
"Assumed returned value should be valid in function scope!");
- if (ReturnedValues[RetVal].insert(&Ret))
+ if (ReturnedValues[&V].insert(&Ret))
Changed = ChangeStatus::CHANGED;
return true;
};
@@ -1712,7 +1897,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
ReturnInst &Ret = cast<ReturnInst>(I);
return genericValueTraversal<ReturnInst>(
A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I);
+ &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
+ /* StripCB */ nullptr, /* Intraprocedural */ true);
};
// Discover returned values from all live returned instructions in the
@@ -1767,24 +1953,16 @@ struct AANoSyncImpl : AANoSync {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
-
- /// Helper function used to determine whether an instruction is non-relaxed
- /// atomic. In other words, if an atomic instruction does not have unordered
- /// or monotonic ordering
- static bool isNonRelaxedAtomic(Instruction *I);
-
- /// Helper function specific for intrinsics which are potentially volatile
- static bool isNoSyncIntrinsic(Instruction *I);
};
-bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
+bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
if (auto *FI = dyn_cast<FenceInst>(I))
// All legal orderings for fence are stronger than monotonic.
return FI->getSyncScopeID() != SyncScope::SingleThread;
- else if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
// Unordered is not a legal ordering for cmpxchg.
return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic ||
AI->getFailureOrdering() != AtomicOrdering::Monotonic);
@@ -1813,7 +1991,7 @@ bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
/// Return true if this intrinsic is nosync. This is only used for intrinsics
/// which would be nosync except that they have a volatile flag. All other
/// intrinsics are simply annotated with the nosync attribute in Intrinsics.td.
-bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
+bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
if (auto *MI = dyn_cast<MemIntrinsic>(I))
return !MI->isVolatile();
return false;
@@ -1822,24 +2000,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
- /// We are looking for volatile instructions or Non-Relaxed atomics.
-
- if (const auto *CB = dyn_cast<CallBase>(&I)) {
- if (CB->hasFnAttr(Attribute::NoSync))
- return true;
-
- if (isNoSyncIntrinsic(&I))
- return true;
-
- const auto &NoSyncAA = A.getAAFor<AANoSync>(
- *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
- return NoSyncAA.isAssumedNoSync();
- }
-
- if (!I.isVolatile() && !isNonRelaxedAtomic(&I))
- return true;
-
- return false;
+ return AA::isNoSyncInst(A, I, *this);
};
auto CheckForNoSync = [&](Instruction &I) {
@@ -2327,16 +2488,6 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
AANoRecurseFunction(const IRPosition &IRP, Attributor &A)
: AANoRecurseImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoRecurseImpl::initialize(A);
- // TODO: We should build a call graph ourselves to enable this in the module
- // pass as well.
- if (const Function *F = getAnchorScope())
- if (A.getInfoCache().getSccSize(*F) != 1)
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
@@ -2359,27 +2510,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
return ChangeStatus::UNCHANGED;
}
- // If the above check does not hold anymore we look at the calls.
- auto CheckForNoRecurse = [&](Instruction &I) {
- const auto &CB = cast<CallBase>(I);
- if (CB.hasFnAttr(Attribute::NoRecurse))
- return true;
-
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- if (!NoRecurseAA.isAssumedNoRecurse())
- return false;
-
- // Recursion to the same function
- if (CB.getCalledFunction() == getAnchorScope())
- return false;
-
- return true;
- };
-
- bool UsedAssumedInformation = false;
- if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this,
- UsedAssumedInformation))
+ const AAFunctionReachability &EdgeReachability =
+ A.getAAFor<AAFunctionReachability>(*this, getIRPosition(),
+ DepClassTy::REQUIRED);
+ if (EdgeReachability.canReach(A, *getAnchorScope()))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -2798,16 +2932,10 @@ struct AAWillReturnImpl : public AAWillReturn {
(!getAssociatedFunction() || !getAssociatedFunction()->mustProgress()))
return false;
- const auto &MemAA =
- A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
- if (!MemAA.isAssumedReadOnly())
- return false;
- if (KnownOnly && !MemAA.isKnownReadOnly())
- return false;
- if (!MemAA.isKnownReadOnly())
- A.recordDependence(MemAA, *this, DepClassTy::OPTIONAL);
-
- return true;
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
+ return IsKnown || !KnownOnly;
+ return false;
}
/// See AbstractAttribute::updateImpl(...).
@@ -2904,6 +3032,10 @@ struct AAReachabilityImpl : AAReachability {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
+ if (!NoRecurseAA.isAssumedNoRecurse())
+ return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
@@ -3008,9 +3140,8 @@ struct AANoAliasArgument final
return Base::updateImpl(A);
// If the argument is read-only, no-alias cannot break synchronization.
- const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
- *this, getIRPosition(), DepClassTy::OPTIONAL);
- if (MemBehaviorAA.isAssumedReadOnly())
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return Base::updateImpl(A);
// If the argument is never passed through callbacks, no-alias cannot break
@@ -3366,14 +3497,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
if (!NoUnwindAA.isKnownNoUnwind())
A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
- const auto &MemBehaviorAA =
- A.getAndUpdateAAFor<AAMemoryBehavior>(*this, CallIRP, DepClassTy::NONE);
- if (MemBehaviorAA.isAssumedReadOnly()) {
- if (!MemBehaviorAA.isKnownReadOnly())
- A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
- return true;
- }
- return false;
+ bool IsKnown;
+ return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown);
}
};
@@ -3699,6 +3824,7 @@ struct AAIsDeadFunction : public AAIsDead {
if (!AssumedLiveBlocks.count(&BB)) {
A.deleteAfterManifest(BB);
++BUILD_STAT_NAME(AAIsDead, BasicBlock);
+ HasChanged = ChangeStatus::CHANGED;
}
return HasChanged;
@@ -3708,7 +3834,7 @@ struct AAIsDeadFunction : public AAIsDead {
ChangeStatus updateImpl(Attributor &A) override;
bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
- return !AssumedLiveEdges.count(std::make_pair(From, To));
+ return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
}
/// See AbstractAttribute::trackStatistics()
@@ -4921,14 +5047,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
AANoCapture::StateType T;
// Readonly means we cannot capture through memory.
- const auto &FnMemAA =
- A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::NONE);
- if (FnMemAA.isAssumedReadOnly()) {
+ bool IsKnown;
+ if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) {
T.addKnownBits(NOT_CAPTURED_IN_MEM);
- if (FnMemAA.isKnownReadOnly())
+ if (IsKnown)
addKnownBits(NOT_CAPTURED_IN_MEM);
- else
- A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL);
}
// Make sure all returned values are different than the underlying value.
@@ -5085,7 +5208,6 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
-} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5106,7 +5228,6 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
-namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -5266,8 +5387,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
- if (!Acc.isWrite())
- return true;
if (Acc.isWrittenValueYetUndetermined())
return true;
Value *Content = Acc.getWrittenValue();
@@ -5287,7 +5406,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
DepClassTy::REQUIRED);
- if (!PI.forallInterferingAccesses(L, CheckAccess))
+ if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
return false;
}
return true;
@@ -5325,9 +5444,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
if (Arg->hasByValAttr()) {
// TODO: We probably need to verify synchronization is not an issue, e.g.,
// there is no race by not copying a constant byval.
- const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(),
- DepClassTy::REQUIRED);
- if (!MemAA.isAssumedReadOnly())
+ bool IsKnown;
+ if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return indicatePessimisticFixpoint();
}
@@ -6827,9 +6945,8 @@ struct AAPrivatizablePtrCallSiteArgument final
return indicatePessimisticFixpoint();
}
- const auto &MemBehaviorAA =
- A.getAAFor<AAMemoryBehavior>(*this, IRP, DepClassTy::REQUIRED);
- if (!MemBehaviorAA.isAssumedReadOnly()) {
+ bool IsKnown;
+ if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
return indicatePessimisticFixpoint();
}
@@ -7378,7 +7495,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
-} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7412,7 +7528,6 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr(
return S;
}
-namespace {
struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
@@ -7657,7 +7772,8 @@ void AAMemoryLocationImpl::categorizePtrValue(
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) {
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
+ /* Intraprocedural */ true)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -9411,7 +9527,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
}
};
- CallBase *CB = static_cast<CallBase *>(getCtxI());
+ CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
setHasUnknownCallee(false, Change);
@@ -9450,7 +9566,7 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto ProcessCallInst = [&](Instruction &Inst) {
- CallBase &CB = static_cast<CallBase &>(Inst);
+ CallBase &CB = cast<CallBase>(Inst);
auto &CBEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
@@ -9481,11 +9597,39 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
struct AAFunctionReachabilityFunction : public AAFunctionReachability {
private:
struct QuerySet {
- void markReachable(Function *Fn) {
- Reachable.insert(Fn);
- Unreachable.erase(Fn);
+ void markReachable(const Function &Fn) {
+ Reachable.insert(&Fn);
+ Unreachable.erase(&Fn);
}
+ /// If there is no information about the function None is returned.
+ Optional<bool> isCachedReachable(const Function &Fn) {
+ // Assume that we can reach the function.
+ // TODO: Be more specific with the unknown callee.
+ if (CanReachUnknownCallee)
+ return true;
+
+ if (Reachable.count(&Fn))
+ return true;
+
+ if (Unreachable.count(&Fn))
+ return false;
+
+ return llvm::None;
+ }
+
+ /// Set of functions that we know for sure is reachable.
+ DenseSet<const Function *> Reachable;
+
+ /// Set of functions that are unreachable, but might become reachable.
+ DenseSet<const Function *> Unreachable;
+
+ /// If we can reach a function with a call to a unknown function we assume
+ /// that we can reach any function.
+ bool CanReachUnknownCallee = false;
+ };
+
+ struct QueryResolver : public QuerySet {
ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList) {
ChangeStatus Change = ChangeStatus::UNCHANGED;
@@ -9499,31 +9643,30 @@ private:
}
}
- for (Function *Fn : make_early_inc_range(Unreachable)) {
- if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+ for (const Function *Fn : make_early_inc_range(Unreachable)) {
+ if (checkIfReachable(A, AA, AAEdgesList, *Fn)) {
Change = ChangeStatus::CHANGED;
- markReachable(Fn);
+ markReachable(*Fn);
}
}
return Change;
}
- bool isReachable(Attributor &A, const AAFunctionReachability &AA,
- ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
- // Assume that we can reach the function.
- // TODO: Be more specific with the unknown callee.
- if (CanReachUnknownCallee)
- return true;
-
- if (Reachable.count(Fn))
- return true;
+ bool isReachable(Attributor &A, AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList,
+ const Function &Fn) {
+ Optional<bool> Cached = isCachedReachable(Fn);
+ if (Cached.hasValue())
+ return Cached.getValue();
- if (Unreachable.count(Fn))
- return false;
+ // The query was not cached, thus it is new. We need to request an update
+ // explicitly to make sure this the information is properly run to a
+ // fixpoint.
+ A.registerForUpdate(AA);
// We need to assume that this function can't reach Fn to prevent
// an infinite loop if this function is recursive.
- Unreachable.insert(Fn);
+ Unreachable.insert(&Fn);
bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
if (Result)
@@ -9533,13 +9676,13 @@ private:
bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList,
- Function *Fn) const {
+ const Function &Fn) const {
// Handle the most trivial case first.
for (auto *AAEdges : AAEdgesList) {
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
- if (Edges.count(Fn))
+ if (Edges.count(const_cast<Function *>(&Fn)))
return true;
}
@@ -9560,28 +9703,44 @@ private:
}
// The result is false for now, set dependencies and leave.
- for (auto Dep : Deps)
- A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
+ for (auto *Dep : Deps)
+ A.recordDependence(*Dep, AA, DepClassTy::REQUIRED);
return false;
}
+ };
- /// Set of functions that we know for sure is reachable.
- DenseSet<Function *> Reachable;
+ /// Get call edges that can be reached by this instruction.
+ bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability,
+ const Instruction &Inst,
+ SmallVector<const AACallEdges *> &Result) const {
+ // Determine call like instructions that we can reach from the inst.
+ auto CheckCallBase = [&](Instruction &CBInst) {
+ if (!Reachability.isAssumedReachable(A, Inst, CBInst))
+ return true;
- /// Set of functions that are unreachable, but might become reachable.
- DenseSet<Function *> Unreachable;
+ auto &CB = cast<CallBase>(CBInst);
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- /// If we can reach a function with a call to a unknown function we assume
- /// that we can reach any function.
- bool CanReachUnknownCallee = false;
- };
+ Result.push_back(&AAEdges);
+ return true;
+ };
+
+ bool UsedAssumedInformation = false;
+ return A.checkForAllCallLikeInstructions(CheckCallBase, *this,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true);
+ }
public:
AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
: AAFunctionReachability(IRP, A) {}
- bool canReach(Attributor &A, Function *Fn) const override {
+ bool canReach(Attributor &A, const Function &Fn) const override {
+ if (!isValidState())
+ return true;
+
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
@@ -9590,14 +9749,18 @@ public:
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
- bool Result =
- NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+ bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis,
+ {&AAEdges}, Fn);
return Result;
}
/// Can \p CB reach \p Fn
- bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+ bool canReach(Attributor &A, CallBase &CB,
+ const Function &Fn) const override {
+ if (!isValidState())
+ return true;
+
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
@@ -9606,13 +9769,40 @@ public:
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
- QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
+ QueryResolver &CBQuery = NonConstThis->CBQueries[&CB];
- bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
+ bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn);
return Result;
}
+ bool instructionCanReach(Attributor &A, const Instruction &Inst,
+ const Function &Fn,
+ bool UseBackwards) const override {
+ if (!isValidState())
+ return true;
+
+ if (UseBackwards)
+ return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr);
+
+ const auto &Reachability = A.getAAFor<AAReachability>(
+ *this, IRPosition::function(*getAssociatedFunction()),
+ DepClassTy::REQUIRED);
+
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges);
+ // Attributor returns attributes as const, so this function has to be
+ // const for users of this attribute to use it without having to do
+ // a const_cast.
+ // This is a hack for us to be able to cache queries.
+ auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst];
+ if (!AllKnown)
+ InstQSet.CanReachUnknownCallee = true;
+
+ return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn);
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const AACallEdges &AAEdges =
@@ -9621,7 +9811,7 @@ public:
Change |= WholeFunction.update(A, *this, {&AAEdges});
- for (auto CBPair : CBQueries) {
+ for (auto &CBPair : CBQueries) {
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(*CBPair.first),
DepClassTy::REQUIRED);
@@ -9629,6 +9819,25 @@ public:
Change |= CBPair.second.update(A, *this, {&AAEdges});
}
+ // Update the Instruction queries.
+ const AAReachability *Reachability;
+ if (!InstQueries.empty()) {
+ Reachability = &A.getAAFor<AAReachability>(
+ *this, IRPosition::function(*getAssociatedFunction()),
+ DepClassTy::REQUIRED);
+ }
+
+ // Check for local callbases first.
+ for (auto &InstPair : InstQueries) {
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown =
+ getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
+ // Update will return change if we this effects any queries.
+ if (!AllKnown)
+ InstPair.second.CanReachUnknownCallee = true;
+ Change |= InstPair.second.update(A, *this, CallEdges);
+ }
+
return Change;
}
@@ -9649,11 +9858,14 @@ private:
}
/// Used to answer if a the whole function can reacha a specific function.
- QuerySet WholeFunction;
+ QueryResolver WholeFunction;
/// Used to answer if a call base inside this function can reach a specific
/// function.
- DenseMap<CallBase *, QuerySet> CBQueries;
+ DenseMap<const CallBase *, QueryResolver> CBQueries;
+
+ /// This is for instruction queries than scan "forward".
+ DenseMap<const Instruction *, QueryResolver> InstQueries;
};
/// ---------------------- Assumption Propagation ------------------------------
@@ -9790,8 +10002,6 @@ private:
}
};
-} // namespace
-
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
&A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 74f11fa30959..927dceec8865 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index d3cac3efce86..1cb32e32c895 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,14 +352,10 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
User *V = U->getUser();
- if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V)) {
- AppendUses(V);
- continue;
- }
- if (auto *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->hasAllConstantIndices())
- return false;
+ auto *GEP = dyn_cast<GEPOperator>(V);
+ if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
+ (GEP && GEP->hasAllConstantIndices())) {
AppendUses(V);
continue;
}
@@ -2229,6 +2225,13 @@ OptimizeGlobalAliases(Module &M,
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
+ // Return whether GV is explicitly or implicitly dso_local and not replaceable
+ // by another definition in the current linkage unit.
+ auto IsModuleLocal = [](GlobalValue &GV) {
+ return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
+ (GV.isDSOLocal() || GV.isImplicitDSOLocal());
+ };
+
for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
@@ -2240,18 +2243,20 @@ OptimizeGlobalAliases(Module &M,
}
// If the alias can change at link time, nothing can be done - bail out.
- if (J.isInterposable())
+ if (!IsModuleLocal(J))
continue;
Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
- // if the aliasee is interposable because aliases point to the local
- // definition.
+ // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
+ // alias can be used to access the definition as if preemption did not
+ // happen.
// TODO: Try to handle non-zero GEPs of local aliasees.
- if (!Target || Target->isInterposable())
+ if (!Target || !IsModuleLocal(*Target))
continue;
+
Target->removeDeadConstantUsers();
// Make all users of the alias use the aliasee instead.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
index e064fbbef595..faf7cb7d566a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches;
// A command flag to be used for debugging to indirect calls from similarity
// matching and outlining.
extern cl::opt<bool> DisableIndirectCalls;
+
+// A command flag to be used for debugging to exclude intrinsics from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableIntrinsics;
+
} // namespace llvm
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
@@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
InstructionClassifier.EnableBranches = !DisableBranches;
InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
+ InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
+
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index c0bb19e184d6..8e83d7bcb6c2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 68f33410c602..2d765fb6ce6d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -26,19 +26,25 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> DisableOpenMPOptBarrierElimination(
+ "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations that eliminate barriers."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> PrintModuleAfterOptimizations(
"openmp-opt-print-module", cl::ZeroOrMore,
cl::desc("Print the current module after OpenMP optimizations."),
@@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged,
"Number of OpenMP parallel regions merged");
STATISTIC(NumBytesMovedToSharedMemory,
"Amount of memory pushed to shared memory");
+STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
#if !defined(NDEBUG)
static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache {
RTLFunctions.insert(F); \
if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
RuntimeFunctionIDMap[F] = _Enum; \
- F->removeFnAttr(Attribute::NoInline); \
auto &RFI = RFIs[_Enum]; \
RFI.Kind = _Enum; \
RFI.Name = _Name; \
@@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache {
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
+ // functions, except if `optnone` is present.
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
+
// TODO: We should attach the attributes defined in OMPKinds.def.
}
@@ -787,6 +807,8 @@ struct OpenMPOpt {
if (remarksEnabled())
analysisGlobalization();
+
+ Changed |= eliminateBarriers();
} else {
if (PrintICVValues)
printICVs();
@@ -809,6 +831,8 @@ struct OpenMPOpt {
Changed = true;
}
}
+
+ Changed |= eliminateBarriers();
}
return Changed;
@@ -1378,6 +1402,213 @@ private:
return Changed;
}
+ /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
+ /// TODO: Make this an AA and expand it to work across blocks and functions.
+ bool eliminateBarriers() {
+ bool Changed = false;
+
+ if (DisableOpenMPOptBarrierElimination)
+ return /*Changed=*/false;
+
+ if (OMPInfoCache.Kernels.empty())
+ return /*Changed=*/false;
+
+ enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
+
+ class BarrierInfo {
+ Instruction *I;
+ enum ImplicitBarrierType Type;
+
+ public:
+ BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
+ BarrierInfo(Instruction &I) : I(&I) {}
+
+ bool isImplicit() { return !I; }
+
+ bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
+
+ bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
+
+ Instruction *getInstruction() { return I; }
+ };
+
+ for (Function *Kernel : OMPInfoCache.Kernels) {
+ for (BasicBlock &BB : *Kernel) {
+ SmallVector<BarrierInfo, 8> BarriersInBlock;
+ SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
+
+ // Add the kernel entry implicit barrier.
+ if (&Kernel->getEntryBlock() == &BB)
+ BarriersInBlock.push_back(IBT_ENTRY);
+
+ // Find implicit and explicit aligned barriers in the same basic block.
+ for (Instruction &I : BB) {
+ if (isa<ReturnInst>(I)) {
+ // Add the implicit barrier when exiting the kernel.
+ BarriersInBlock.push_back(IBT_EXIT);
+ continue;
+ }
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ auto IsAlignBarrierCB = [&](CallBase &CB) {
+ switch (CB.getIntrinsicID()) {
+ case Intrinsic::nvvm_barrier0:
+ case Intrinsic::nvvm_barrier0_and:
+ case Intrinsic::nvvm_barrier0_or:
+ case Intrinsic::nvvm_barrier0_popc:
+ case Intrinsic::amdgcn_s_barrier:
+ return true;
+ default:
+ break;
+ }
+ return hasAssumption(CB,
+ KnownAssumptionString("ompx_aligned_barrier"));
+ };
+
+ if (IsAlignBarrierCB(*CB)) {
+ // Add an explicit aligned barrier.
+ BarriersInBlock.push_back(I);
+ }
+ }
+
+ if (BarriersInBlock.size() <= 1)
+ continue;
+
+ // A barrier in a barrier pair is removeable if all instructions
+ // between the barriers in the pair are side-effect free modulo the
+ // barrier operation.
+ auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
+ BarrierInfo *EndBI) {
+ assert(
+ !StartBI->isImplicitExit() &&
+ "Expected start barrier to be other than a kernel exit barrier");
+ assert(
+ !EndBI->isImplicitEntry() &&
+ "Expected end barrier to be other than a kernel entry barrier");
+ // If StarBI instructions is null then this the implicit
+ // kernel entry barrier, so iterate from the first instruction in the
+ // entry block.
+ Instruction *I = (StartBI->isImplicitEntry())
+ ? &Kernel->getEntryBlock().front()
+ : StartBI->getInstruction()->getNextNode();
+ assert(I && "Expected non-null start instruction");
+ Instruction *E = (EndBI->isImplicitExit())
+ ? I->getParent()->getTerminator()
+ : EndBI->getInstruction();
+ assert(E && "Expected non-null end instruction");
+
+ for (; I != E; I = I->getNextNode()) {
+ if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
+ continue;
+
+ auto IsPotentiallyAffectedByBarrier =
+ [](Optional<MemoryLocation> Loc) {
+ const Value *Obj = (Loc && Loc->Ptr)
+ ? getUnderlyingObject(Loc->Ptr)
+ : nullptr;
+ if (!Obj) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Access to unknown location requires barriers\n");
+ return true;
+ }
+ if (isa<UndefValue>(Obj))
+ return false;
+ if (isa<AllocaInst>(Obj))
+ return false;
+ if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
+ if (GV->isConstant())
+ return false;
+ if (GV->isThreadLocal())
+ return false;
+ if (GV->getAddressSpace() == (int)AddressSpace::Local)
+ return false;
+ if (GV->getAddressSpace() == (int)AddressSpace::Constant)
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Access to '" << *Obj
+ << "' requires barriers\n");
+ return true;
+ };
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ Optional<MemoryLocation> Loc =
+ MemoryLocation::getForSource(MTI);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ }
+ continue;
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+ continue;
+
+ Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
+ if (IsPotentiallyAffectedByBarrier(Loc))
+ return false;
+ }
+
+ return true;
+ };
+
+ // Iterate barrier pairs and remove an explicit barrier if analysis
+ // deems it removeable.
+ for (auto *It = BarriersInBlock.begin(),
+ *End = BarriersInBlock.end() - 1;
+ It != End; ++It) {
+
+ BarrierInfo *StartBI = It;
+ BarrierInfo *EndBI = (It + 1);
+
+ // Cannot remove when both are implicit barriers, continue.
+ if (StartBI->isImplicit() && EndBI->isImplicit())
+ continue;
+
+ if (!IsBarrierRemoveable(StartBI, EndBI))
+ continue;
+
+ assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
+ "Expected at least one explicit barrier to remove.");
+
+ // Remove an explicit barrier, check first, then second.
+ if (!StartBI->isImplicit()) {
+ LLVM_DEBUG(dbgs() << "Remove start barrier "
+ << *StartBI->getInstruction() << "\n");
+ BarriersToBeDeleted.insert(StartBI->getInstruction());
+ } else {
+ LLVM_DEBUG(dbgs() << "Remove end barrier "
+ << *EndBI->getInstruction() << "\n");
+ BarriersToBeDeleted.insert(EndBI->getInstruction());
+ }
+ }
+
+ if (BarriersToBeDeleted.empty())
+ continue;
+
+ Changed = true;
+ for (Instruction *I : BarriersToBeDeleted) {
+ ++NumBarriersEliminated;
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Redundant barrier eliminated.";
+ };
+
+ if (EnableVerboseRemarks)
+ emitRemark<OptimizationRemark>(I, "OMP190", Remark);
+ I->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+ }
+
void analysisGlobalization() {
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 21395460bccb..e104ae00e916 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index daaf6cbeb3fd..52708ff2f226 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -535,7 +535,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
// the information that is needed by thin link will be written in the
// given OS.
if (ThinLinkOS && Index)
- WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
+ writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
}
class WriteThinLTOBitcode : public ModulePass {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6acace1d9fd4..8b30f0e989a1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -970,7 +970,7 @@ bool DevirtModule::runForTesting(
if (StringRef(ClWriteSummary).endswith(".bc")) {
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
ExitOnErr(errorCodeToError(EC));
- WriteIndexToFile(*Summary, OS);
+ writeIndexToFile(*Summary, OS);
} else {
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
ExitOnErr(errorCodeToError(EC));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1fb46af46bee..05b28328afbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Fence instruction simplification
Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
- // Remove identical consecutive fences.
- Instruction *Next = FI.getNextNonDebugInstruction();
- if (auto *NFI = dyn_cast<FenceInst>(Next))
- if (FI.isIdenticalTo(NFI))
+ auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
+ // This check is solely here to handle arbitrary target-dependent syncscopes.
+ // TODO: Can remove if does not matter in practice.
+ if (NFI && FI.isIdenticalTo(NFI))
+ return eraseInstFromFunction(FI);
+
+ // Returns true if FI1 is identical or stronger fence than FI2.
+ auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
+ auto FI1SyncScope = FI1->getSyncScopeID();
+ // Consider same scope, where scope is global or single-thread.
+ if (FI1SyncScope != FI2->getSyncScopeID() ||
+ (FI1SyncScope != SyncScope::System &&
+ FI1SyncScope != SyncScope::SingleThread))
+ return false;
+
+ return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
+ };
+ if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
+ return eraseInstFromFunction(FI);
+
+ if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
+ if (isIdenticalOrStrongerFence(PFI, &FI))
return eraseInstFromFunction(FI);
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fd58a44504b3..e45be5745fcc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5882,6 +5882,55 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
return nullptr;
}
+/// This function folds patterns produced by lowering of reduce idioms, such as
+/// llvm.vector.reduce.and which are lowered into instruction chains. This code
+/// attempts to generate fewer number of scalar comparisons instead of vector
+/// comparisons when possible.
+static Instruction *foldReductionIdiom(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
+ if (I.getType()->isVectorTy())
+ return nullptr;
+ ICmpInst::Predicate OuterPred, InnerPred;
+ Value *LHS, *RHS;
+
+ // Match lowering of @llvm.vector.reduce.and. Turn
+ /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs
+ /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8
+ /// %res = icmp <pred> i8 %scalar_ne, 0
+ ///
+ /// into
+ ///
+ /// %lhs.scalar = bitcast <8 x i8> %lhs to i64
+ /// %rhs.scalar = bitcast <8 x i8> %rhs to i64
+ /// %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar
+ ///
+ /// for <pred> in {ne, eq}.
+ if (!match(&I, m_ICmp(OuterPred,
+ m_OneUse(m_BitCast(m_OneUse(
+ m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
+ m_Zero())))
+ return nullptr;
+ auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
+ if (!LHSTy || !LHSTy->getElementType()->isIntegerTy())
+ return nullptr;
+ unsigned NumBits =
+ LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
+ // TODO: Relax this to "not wider than max legal integer type"?
+ if (!DL.isLegalInteger(NumBits))
+ return nullptr;
+
+ if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) {
+ auto *ScalarTy = Builder.getIntNTy(NumBits);
+ LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
+ RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
+ return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS,
+ I.getName());
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6124,6 +6173,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInvariantGroup(I))
return Res;
+ if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
+ return Res;
+
return Changed ? &I : nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 30f6aab2114b..09694d50468f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -46,8 +46,8 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
// will be inefficient.
assert(!isa<CallInst>(Inst));
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = cast<Instruction>(V);
Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc());
}
}
@@ -138,8 +138,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
return nullptr;
SmallVector<Value *, 4> AvailablePtrVals;
- for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
- Value *Arg = PN.getIncomingValue(i);
+ for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *Arg = std::get<1>(Incoming);
// First look backward:
if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) {
@@ -151,8 +152,8 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
Value *ArgIntToPtr = nullptr;
for (User *U : Arg->users()) {
if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() &&
- (DT.dominates(cast<Instruction>(U), PN.getIncomingBlock(i)) ||
- cast<Instruction>(U)->getParent() == PN.getIncomingBlock(i))) {
+ (DT.dominates(cast<Instruction>(U), BB) ||
+ cast<Instruction>(U)->getParent() == BB)) {
ArgIntToPtr = U;
break;
}
@@ -190,26 +191,21 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
"Not enough available ptr typed incoming values");
PHINode *MatchingPtrPHI = nullptr;
unsigned NumPhis = 0;
- for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) {
+ for (PHINode &PtrPHI : BB->phis()) {
// FIXME: consider handling this in AggressiveInstCombine
- PHINode *PtrPHI = dyn_cast<PHINode>(II);
- if (!PtrPHI)
- break;
- if (NumPhis > MaxNumPhis)
+ if (NumPhis++ > MaxNumPhis)
return nullptr;
- if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType())
+ if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType())
continue;
- MatchingPtrPHI = PtrPHI;
- for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) {
- if (AvailablePtrVals[i] !=
- PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) {
- MatchingPtrPHI = nullptr;
- break;
- }
- }
-
- if (MatchingPtrPHI)
- break;
+ if (any_of(zip(PN.blocks(), AvailablePtrVals),
+ [&](const auto &BlockAndValue) {
+ BasicBlock *BB = std::get<0>(BlockAndValue);
+ Value *V = std::get<1>(BlockAndValue);
+ return PtrPHI.getIncomingValueForBlock(BB) != V;
+ }))
+ continue;
+ MatchingPtrPHI = &PtrPHI;
+ break;
}
if (MatchingPtrPHI) {
@@ -250,9 +246,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
InsertNewInstBefore(NewPtrPHI, PN);
SmallDenseMap<Value *, Instruction *> Casts;
- for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
- auto *IncomingBB = PN.getIncomingBlock(i);
- auto *IncomingVal = AvailablePtrVals[i];
+ for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {
+ auto *IncomingBB = std::get<0>(Incoming);
+ auto *IncomingVal = std::get<1>(Incoming);
if (IncomingVal->getType() == IntToPtr->getType()) {
NewPtrPHI->addIncoming(IncomingVal, IncomingBB);
@@ -330,8 +326,8 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {
// Scan to see if all operands are `insertvalue`'s with the same indicies,
// and all have a single use.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = dyn_cast<InsertValueInst>(V);
if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices())
return nullptr;
}
@@ -370,8 +366,8 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {
// Scan to see if all operands are `extractvalue`'s with the same indicies,
// and all have a single use.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ auto *I = dyn_cast<ExtractValueInst>(V);
if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() ||
I->getAggregateOperand()->getType() !=
FirstEVI->getAggregateOperand()->getType())
@@ -412,8 +408,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
Type *RHSType = RHSVal->getType();
// Scan to see if all operands are the same opcode, and all have one user.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ Instruction *I = dyn_cast<Instruction>(V);
if (!I || I->getOpcode() != Opc || !I->hasOneUser() ||
// Verify type of the LHS matches so we don't fold cmp's of different
// types.
@@ -461,15 +457,17 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ Instruction *InInst = cast<Instruction>(InVal);
if (NewLHS) {
Value *NewInLHS = InInst->getOperand(0);
- NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ NewLHS->addIncoming(NewInLHS, InBB);
}
if (NewRHS) {
Value *NewInRHS = InInst->getOperand(1);
- NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ NewRHS->addIncoming(NewInRHS, InBB);
}
}
}
@@ -487,8 +485,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
NewBinOp->copyIRFlags(PN.getIncomingValue(0));
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
- NewBinOp->andIRFlags(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values()))
+ NewBinOp->andIRFlags(V);
PHIArgMergedDebugLoc(NewBinOp, PN);
return NewBinOp;
@@ -511,9 +509,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
bool AllInBounds = true;
// Scan to see if all operands are the same opcode, and all have one user.
- for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
- GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() ||
GEP->getNumOperands() != FirstInst->getNumOperands())
return nullptr;
@@ -527,8 +524,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
AllBasePointersAreAllocas = false;
// Compare the operand lists.
- for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
- if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) {
+ if (FirstInst->getOperand(Op) == GEP->getOperand(Op))
continue;
// Don't merge two GEPs when two operands differ (introducing phi nodes)
@@ -536,11 +533,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
// substantially cheaper to compute for the constants, so making it a
// variable index could pessimize the path. This also handles the case
// for struct indices, which must always be constant.
- if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
- isa<ConstantInt>(GEP->getOperand(op)))
+ if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
+ isa<ConstantInt>(GEP->getOperand(Op)))
return nullptr;
- if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ if (FirstInst->getOperand(Op)->getType() !=
+ GEP->getOperand(Op)->getType())
return nullptr;
// If we already needed a PHI for an earlier operand, and another operand
@@ -550,7 +548,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
if (NeededPhi)
return nullptr;
- FixedOperands[op] = nullptr; // Needs a PHI.
+ FixedOperands[Op] = nullptr; // Needs a PHI.
NeededPhi = true;
}
}
@@ -569,29 +567,30 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
bool HasAnyPHIs = false;
- for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
- if (FixedOperands[i]) continue; // operand doesn't need a phi.
- Value *FirstOp = FirstInst->getOperand(i);
- PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
- FirstOp->getName()+".pn");
+ for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) {
+ if (FixedOperands[I])
+ continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(I);
+ PHINode *NewPN =
+ PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");
InsertNewInstBefore(NewPN, PN);
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
- OperandPhis[i] = NewPN;
- FixedOperands[i] = NewPN;
+ OperandPhis[I] = NewPN;
+ FixedOperands[I] = NewPN;
HasAnyPHIs = true;
}
-
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
- BasicBlock *InBB = PN.getIncomingBlock(i);
-
- for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
- if (PHINode *OpPhi = OperandPhis[op])
- OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal);
+
+ for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op)
+ if (PHINode *OpPhi = OperandPhis[Op])
+ OpPhi->addIncoming(InGEP->getOperand(Op), InBB);
}
}
@@ -627,18 +626,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
- bool isAddressTaken = false;
+ bool IsAddressTaken = false;
for (User *U : AI->users()) {
if (isa<LoadInst>(U)) continue;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If storing TO the alloca, then the address isn't taken.
if (SI->getOperand(1) == AI) continue;
}
- isAddressTaken = true;
+ IsAddressTaken = true;
break;
}
- if (!isAddressTaken && AI->isStaticAlloca())
+ if (!IsAddressTaken && AI->isStaticAlloca())
return false;
}
@@ -665,9 +664,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is.
- bool isVolatile = FirstLI->isVolatile();
+ bool IsVolatile = FirstLI->isVolatile();
Align LoadAlignment = FirstLI->getAlign();
- unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+ const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
@@ -678,22 +677,25 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (IsVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return nullptr;
- // Check to see if all arguments are the same operation.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
- if (!LI || !LI->hasOneUser())
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *InBB = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
+ LoadInst *LI = dyn_cast<LoadInst>(InVal);
+ if (!LI || !LI->hasOneUser() || LI->isAtomic())
+ return nullptr;
+
+ // Make sure all arguments are the same type of operation.
+ if (LI->isVolatile() != IsVolatile ||
+ LI->getPointerAddressSpace() != LoadAddrSpace)
return nullptr;
// We can't sink the load if the loaded value could be modified between
// the load and the PHI.
- if (LI->isVolatile() != isVolatile ||
- LI->getParent() != PN.getIncomingBlock(i) ||
- LI->getPointerAddressSpace() != LoadAddrSpace ||
- !isSafeAndProfitableToSinkLoad(LI))
+ if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI))
return nullptr;
LoadAlignment = std::min(LoadAlignment, LI->getAlign());
@@ -701,8 +703,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
- LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return nullptr;
}
@@ -715,7 +716,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
LoadInst *NewLI =
- new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
+ new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment);
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
@@ -734,13 +735,15 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
// Add all operands to the new PHI and combine TBAA metadata.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ LoadInst *LI = cast<LoadInst>(V);
combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
- NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ NewPN->addIncoming(NewInVal, BB);
}
if (InVal) {
@@ -755,7 +758,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
- if (isVolatile)
+ if (IsVolatile)
for (Value *IncValue : PN.incoming_values())
cast<LoadInst>(IncValue)->setVolatile(false);
@@ -830,8 +833,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
// operands, and zext the result back to the original type.
PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
Phi.getName() + ".shrunk");
- for (unsigned i = 0; i != NumIncomingValues; ++i)
- NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+ for (unsigned I = 0; I != NumIncomingValues; ++I)
+ NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));
InsertNewInstBefore(NewPhi, Phi);
return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
@@ -885,13 +888,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
}
// Check to see if all arguments are the same operation.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values())) {
+ Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst))
return nullptr;
if (CastSrcTy) {
if (I->getOperand(0)->getType() != CastSrcTy)
- return nullptr; // Cast operation must match.
+ return nullptr; // Cast operation must match.
} else if (I->getOperand(1) != ConstantOp) {
return nullptr;
}
@@ -907,11 +910,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
// Add all operands to the new PHI.
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ Value *NewInVal = cast<Instruction>(V)->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
- NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ NewPN->addIncoming(NewInVal, BB);
}
Value *PhiVal;
@@ -937,8 +942,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
BinOp->copyIRFlags(PN.getIncomingValue(0));
- for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
- BinOp->andIRFlags(PN.getIncomingValue(i));
+ for (Value *V : drop_begin(PN.incoming_values()))
+ BinOp->andIRFlags(V);
PHIArgMergedDebugLoc(BinOp, PN);
return BinOp;
@@ -952,8 +957,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
}
/// Return true if this PHI node is only used by a PHI node cycle that is dead.
-static bool DeadPHICycle(PHINode *PN,
- SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
+static bool isDeadPHICycle(PHINode *PN,
+ SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) {
if (PN->use_empty()) return true;
if (!PN->hasOneUse()) return false;
@@ -966,7 +971,7 @@ static bool DeadPHICycle(PHINode *PN,
return false;
if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
- return DeadPHICycle(PU, PotentiallyDeadPHIs);
+ return isDeadPHICycle(PU, PotentiallyDeadPHIs);
return false;
}
@@ -999,7 +1004,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
/// Return an existing non-zero constant if this phi node has one, otherwise
/// return constant 1.
-static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
+static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) {
assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");
for (Value *V : PN.operands())
if (auto *ConstVA = dyn_cast<ConstantInt>(V))
@@ -1014,8 +1019,8 @@ struct PHIUsageRecord {
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
- PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
- : PHIId(pn), Shift(Sh), Inst(User) {}
+ PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User)
+ : PHIId(Pn), Shift(Sh), Inst(User) {}
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
@@ -1032,12 +1037,11 @@ struct LoweredPHIRecord {
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
- LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
- : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+ LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty)
+ : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
// Ctor form used by DenseMap.
- LoweredPHIRecord(PHINode *pn, unsigned Sh)
- : PN(pn), Shift(Sh), Width(0) {}
+ LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {}
};
} // namespace
@@ -1093,10 +1097,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
// bail out.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
- if (!II) continue;
- if (II->getParent() != PN->getIncomingBlock(i))
+ for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+ BasicBlock *BB = std::get<0>(Incoming);
+ Value *V = std::get<1>(Incoming);
+ InvokeInst *II = dyn_cast<InvokeInst>(V);
+ if (!II)
+ continue;
+ if (II->getParent() != BB)
continue;
// If we have a phi, and if it's directly in the predecessor, then we have
@@ -1146,8 +1153,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) dbgs()
- << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';);
+ for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs()
+ << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n');
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -1175,8 +1182,9 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = PN->getIncomingBlock(i);
+ for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+ BasicBlock *Pred = std::get<0>(Incoming);
+ Value *InVal = std::get<1>(Incoming);
Value *&PredVal = PredValues[Pred];
// If we already have a value for this predecessor, reuse it.
@@ -1186,7 +1194,6 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
// Handle the PHI self-reuse case.
- Value *InVal = PN->getIncomingValue(i);
if (InVal == PN) {
PredVal = EltPHI;
EltPHI->addIncoming(PredVal, Pred);
@@ -1207,8 +1214,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Builder.SetInsertPoint(Pred->getTerminator());
Value *Res = InVal;
if (Offset)
- Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(),
- Offset), "extract");
+ Res = Builder.CreateLShr(
+ Res, ConstantInt::get(InVal->getType(), Offset), "extract");
Res = Builder.CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
@@ -1217,12 +1224,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
// needed piece.
- if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHINode *OldInVal = dyn_cast<PHINode>(InVal))
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId =
find(PHIsToSlice, OldInVal) - PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
- cast<Instruction>(Res)));
+ PHIUsers.push_back(
+ PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res)));
++UserE;
}
}
@@ -1240,12 +1247,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with poison.
Value *Poison = PoisonValue::get(FirstPhi.getType());
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- replaceInstUsesWith(*PHIsToSlice[i], Poison);
+ for (PHINode *PHI : drop_begin(PHIsToSlice))
+ replaceInstUsesWith(*PHI, Poison);
return replaceInstUsesWith(FirstPhi, Poison);
}
-static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
const DominatorTree &DT) {
// Simplify the following patterns:
// if (cond)
@@ -1302,8 +1309,8 @@ static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
DT.dominates(FalseOutEdge, FalseIncEdge))
// This Phi is actually equivalent to branching condition of IDom.
return Cond;
- else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
- DT.dominates(FalseOutEdge, TrueIncEdge)) {
+ if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+ DT.dominates(FalseOutEdge, TrueIncEdge)) {
// This Phi is actually opposite to branching condition of IDom. We invert
// the condition that will potentially open up some opportunities for
// sinking.
@@ -1369,7 +1376,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
PotentiallyDeadPHIs.insert(&PN);
- if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ if (isDeadPHICycle(PU, PotentiallyDeadPHIs))
return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));
}
@@ -1398,15 +1405,15 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
match(CmpInst->getOperand(1), m_Zero())) {
ConstantInt *NonZeroConst = nullptr;
bool MadeChange = false;
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
- Value *VA = PN.getIncomingValue(i);
+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+ Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator();
+ Value *VA = PN.getIncomingValue(I);
if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
if (!NonZeroConst)
- NonZeroConst = GetAnyNonZeroConstInt(PN);
+ NonZeroConst = getAnyNonZeroConstInt(PN);
if (NonZeroConst != VA) {
- replaceOperand(PN, i, NonZeroConst);
+ replaceOperand(PN, I, NonZeroConst);
MadeChange = true;
}
}
@@ -1457,17 +1464,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// however.
PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
if (&PN != FirstPN)
- for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *BBA = PN.getIncomingBlock(i);
- BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *BBA = PN.getIncomingBlock(I);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(I);
if (BBA != BBB) {
- Value *VA = PN.getIncomingValue(i);
- unsigned j = PN.getBasicBlockIndex(BBB);
- Value *VB = PN.getIncomingValue(j);
- PN.setIncomingBlock(i, BBB);
- PN.setIncomingValue(i, VB);
- PN.setIncomingBlock(j, BBA);
- PN.setIncomingValue(j, VA);
+ Value *VA = PN.getIncomingValue(I);
+ unsigned J = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(J);
+ PN.setIncomingBlock(I, BBB);
+ PN.setIncomingValue(I, VB);
+ PN.setIncomingBlock(J, BBA);
+ PN.setIncomingValue(J, VA);
// NOTE: Instcombine normally would want us to "return &PN" if we
// modified any of the operands of an instruction. However, since we
// aren't adding or removing uses (just rearranging them) we don't do
@@ -1500,7 +1507,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
return Res;
// Ultimately, try to replace this Phi with a dominating condition.
- if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+ if (auto *V = simplifyUsingControlFlow(*this, PN, DT))
return replaceInstUsesWith(PN, V);
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 71a5ae24eead..3f064cfda712 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1219,7 +1219,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
I != E; I++)
if (I.isStruct())
- return true;;
+ return true;
return false;
};
if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
@@ -1228,10 +1228,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Conservatively track the demanded elements back through any vector
// operands we may have. We know there must be at least one, or we
// wouldn't have a vector result to get here. Note that we intentionally
- // merge the undef bits here since gepping with either an undef base or
- // index results in undef.
+ // merge the undef bits here since gepping with either an poison base or
+ // index results in poison.
for (unsigned i = 0; i < I->getNumOperands(); i++) {
- if (match(I->getOperand(i), m_Undef())) {
+ if (i == 0 ? match(I->getOperand(i), m_Undef())
+ : match(I->getOperand(i), m_Poison())) {
// If the entire vector is undefined, just return this info.
UndefElts = EltMask;
return nullptr;
@@ -1239,7 +1240,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (I->getOperand(i)->getType()->isVectorTy()) {
APInt UndefEltsOp(VWidth, 0);
simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
- UndefElts |= UndefEltsOp;
+ // gep(x, undef) is not undef, so skip considering idx ops here
+ // Note that we could propagate poison, but we can't distinguish between
+ // undef & poison bits ATM
+ if (i == 0)
+ UndefElts |= UndefEltsOp;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 029be5257694..3091905ca534 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6e72255e51ae..8f94172a6402 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1527,22 +1527,22 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
@@ -1556,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
auto BasePtr = CI->getOperand(OpOffset);
- if (ignoreAccess(LI, BasePtr))
+ if (ignoreAccess(I, BasePtr))
return;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = Align(1);
@@ -1568,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
} else {
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(LI, CI->getArgOperand(ArgNo)))
+ ignoreAccess(I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index fb10a99d1338..7b3741d19a1b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -304,6 +304,7 @@ public:
static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
const DominatorTree &DT);
bool instrumentStack(
+ bool ShouldDetectUseAfterScope,
MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1359,6 +1360,7 @@ bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
}
bool HWAddressSanitizer::instrumentStack(
+ bool ShouldDetectUseAfterScope,
MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1410,7 +1412,7 @@ bool HWAddressSanitizer::instrumentStack(
};
bool StandardLifetime =
UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
- if (DetectUseAfterScope && StandardLifetime) {
+ if (ShouldDetectUseAfterScope && StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IRB.SetInsertPoint(Start->getNextNode());
tagAlloca(IRB, AI, Tag, Size);
@@ -1505,8 +1507,14 @@ bool HWAddressSanitizer::sanitizeFunction(
SmallVector<Instruction *, 8> LandingPadVec;
SmallVector<Instruction *, 4> UnrecognizedLifetimes;
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
+ bool CallsReturnTwice = false;
for (auto &BB : F) {
for (auto &Inst : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+ if (CI->canReturnTwice()) {
+ CallsReturnTwice = true;
+ }
+ }
if (InstrumentStack) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isInterestingAlloca(*AI))
@@ -1531,9 +1539,14 @@ bool HWAddressSanitizer::sanitizeFunction(
}
}
- if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
- isa<CleanupReturnInst>(Inst))
+ if (isa<ReturnInst>(Inst)) {
+ if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
+ RetVec.push_back(CI);
+ else
+ RetVec.push_back(&Inst);
+ } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
RetVec.push_back(&Inst);
+ }
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
for (Value *V : DVI->location_ops()) {
@@ -1585,7 +1598,12 @@ bool HWAddressSanitizer::sanitizeFunction(
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
+ // Calls to functions that may return twice (e.g. setjmp) confuse the
+ // postdominator analysis, and will leave us to keep memory tagged after
+ // function return. Work around this by always untagging at every return
+ // statement if return_twice functions are called.
+ instrumentStack(DetectUseAfterScope && !CallsReturnTwice,
+ AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
RetVec, StackTag, GetDT, GetPDT);
}
// Pad and align each of the allocas that we instrumented to stop small
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index ab179b03dd29..6868408ef5f5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
lowerIncrement(IPI);
MadeChange = true;
+ } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
+ lowerCover(IPC);
+ MadeChange = true;
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(IPVP);
MadeChange = true;
@@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) {
return !F->use_empty();
return false;
};
- return containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
+ return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
+ containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
}
@@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Ind->eraseFromParent();
}
-void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
- GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
-
- IRBuilder<> Builder(Inc);
- uint64_t Index = Inc->getIndex()->getZExtValue();
- Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(),
- Counters, 0, Index);
-
- if (isRuntimeCounterRelocationEnabled()) {
- Type *Int64Ty = Type::getInt64Ty(M->getContext());
- Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
- Function *Fn = Inc->getParent()->getParent();
- Instruction &I = Fn->getEntryBlock().front();
- LoadInst *LI = dyn_cast<LoadInst>(&I);
- if (!LI) {
- IRBuilder<> Builder(&I);
- GlobalVariable *Bias =
- M->getGlobalVariable(getInstrProfCounterBiasVarName());
- if (!Bias) {
- // Compiler must define this variable when runtime counter relocation
- // is being used. Runtime has a weak external reference that is used
- // to check whether that's the case or not.
- Bias = new GlobalVariable(
- *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
- Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
- Bias->setVisibility(GlobalVariable::HiddenVisibility);
- // A definition that's weak (linkonce_odr) without being in a COMDAT
- // section wouldn't lead to link errors, but it would lead to a dead
- // data word from every TU but one. Putting it in COMDAT ensures there
- // will be exactly one data slot in the link.
- if (TT.supportsCOMDAT())
- Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
- }
- LI = Builder.CreateLoad(Int64Ty, Bias);
+Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
+ auto *Counters = getOrCreateRegionCounters(I);
+ IRBuilder<> Builder(I);
+
+ auto *Addr = Builder.CreateConstInBoundsGEP2_32(
+ Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
+
+ if (!isRuntimeCounterRelocationEnabled())
+ return Addr;
+
+ Type *Int64Ty = Type::getInt64Ty(M->getContext());
+ Function *Fn = I->getParent()->getParent();
+ Instruction &EntryI = Fn->getEntryBlock().front();
+ LoadInst *LI = dyn_cast<LoadInst>(&EntryI);
+ if (!LI) {
+ IRBuilder<> EntryBuilder(&EntryI);
+ auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+ if (!Bias) {
+ // Compiler must define this variable when runtime counter relocation
+ // is being used. Runtime has a weak external reference that is used
+ // to check whether that's the case or not.
+ Bias = new GlobalVariable(
+ *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
+ Bias->setVisibility(GlobalVariable::HiddenVisibility);
+ // A definition that's weak (linkonce_odr) without being in a COMDAT
+ // section wouldn't lead to link errors, but it would lead to a dead
+ // data word from every TU but one. Putting it in COMDAT ensures there
+ // will be exactly one data slot in the link.
+ if (TT.supportsCOMDAT())
+ Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
}
- auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
- Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
+ LI = EntryBuilder.CreateLoad(Int64Ty, Bias);
}
+ auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
+ return Builder.CreateIntToPtr(Add, Addr->getType());
+}
+
+void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
+ auto *Addr = getCounterAddress(CoverInstruction);
+ IRBuilder<> Builder(CoverInstruction);
+ // We store zero to represent that this block is covered.
+ Builder.CreateStore(Builder.getInt8(0), Addr);
+ CoverInstruction->eraseFromParent();
+}
+
+void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+ auto *Addr = getCounterAddress(Inc);
+ IRBuilder<> Builder(Inc);
if (Options.Atomic || AtomicCounterUpdateAll ||
- (Index == 0 && AtomicFirstCounter)) {
+ (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
MaybeAlign(), AtomicOrdering::Monotonic);
} else {
@@ -849,6 +864,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
}
GlobalVariable *
+InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
+ GlobalValue::LinkageTypes Linkage) {
+ uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+ auto &Ctx = M->getContext();
+ GlobalVariable *GV;
+ if (isa<InstrProfCoverInst>(Inc)) {
+ auto *CounterTy = Type::getInt8Ty(Ctx);
+ auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
+ // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
+ std::vector<Constant *> InitialValues(NumCounters,
+ Constant::getAllOnesValue(CounterTy));
+ GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
+ ConstantArray::get(CounterArrTy, InitialValues),
+ Name);
+ GV->setAlignment(Align(1));
+ } else {
+ auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+ GV = new GlobalVariable(*M, CounterTy, false, Linkage,
+ Constant::getNullValue(CounterTy), Name);
+ GV->setAlignment(Align(8));
+ }
+ return GV;
+}
+
+GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
GlobalVariable *NamePtr = Inc->getName();
auto &PD = ProfileDataMap[NamePtr];
@@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
- ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
- // Create the counters variable.
- auto *CounterPtr =
- new GlobalVariable(*M, CounterTy, false, Linkage,
- Constant::getNullValue(CounterTy), CntsVarName);
+ auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
- CounterPtr->setAlignment(Align(8));
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
PD.RegionCounters = CounterPtr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 8fedefccf0e1..5e078f2c4212 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index cfe993dedbc2..c51acdf52f14 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -182,6 +182,7 @@
#include "llvm/IR/ValueMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1718,11 +1719,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Figure out maximal valid memcpy alignment.
const Align ArgAlign = DL.getValueOrABITypeAlignment(
MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
- Value *CpShadowPtr =
+ Value *CpShadowPtr, *CpOriginPtr;
+ std::tie(CpShadowPtr, CpOriginPtr) =
getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
- /*isStore*/ true)
- .first;
- // TODO(glider): need to copy origins.
+ /*isStore*/ true);
if (!PropagateShadow || Overflow) {
// ParamTLS overflow.
EntryIRB.CreateMemSet(
@@ -1735,6 +1735,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CopyAlign, Size);
LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
(void)Cpy;
+
+ if (MS.TrackOrigins) {
+ Value *OriginPtr =
+ getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ // FIXME: OriginSize should be:
+ // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
+ unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+ EntryIRB.CreateMemCpy(
+ CpOriginPtr,
+ /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
+ /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+ OriginSize);
+ }
}
}
@@ -3701,7 +3714,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
insertShadowCheck(A, &CB);
Size = DL.getTypeAllocSize(A->getType());
} else {
- bool ArgIsInitialized = false;
Value *Store = nullptr;
// Compute the Shadow for arg even if it is ByVal, because
// in that case getShadow() will copy the actual arg shadow to
@@ -3722,10 +3734,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MaybeAlign Alignment = llvm::None;
if (ParamAlignment)
Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
- Value *AShadowPtr =
+ Value *AShadowPtr, *AOriginPtr;
+ std::tie(AShadowPtr, AOriginPtr) =
getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ false)
- .first;
+ /*isStore*/ false);
if (!PropagateShadow) {
Store = IRB.CreateMemSet(ArgShadowBase,
Constant::getNullValue(IRB.getInt8Ty()),
@@ -3733,6 +3745,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
Alignment, Size);
+ if (MS.TrackOrigins) {
+ Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
+ // FIXME: OriginSize should be:
+ // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
+ unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+ IRB.CreateMemCpy(
+ ArgOriginBase,
+ /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+ AOriginPtr,
+ /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
+ }
}
} else {
// Any other parameters mean we need bit-grained tracking of uninit
@@ -3743,12 +3766,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
Constant *Cst = dyn_cast<Constant>(ArgShadow);
- if (Cst && Cst->isNullValue())
- ArgIsInitialized = true;
+ if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ }
}
- if (MS.TrackOrigins && !ArgIsInitialized)
- IRB.CreateStore(getOrigin(A),
- getOriginPtrForArgument(A, IRB, ArgOffset));
(void)Store;
assert(Store != nullptr);
LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c46415e5b1f4..0902a94452e3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry(
"pgo-instrument-entry", cl::init(false), cl::Hidden,
cl::desc("Force to instrument function entry basicblock."));
+static cl::opt<bool> PGOFunctionEntryCoverage(
+ "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc(
+ "Use this option to enable function entry coverage instrumentation."));
+
static cl::opt<bool>
PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
cl::desc("Fix function entry count in profile use."));
@@ -337,6 +342,33 @@ static const char *ValueProfKindDescr[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+ const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
+ Type *IntTy64 = Type::getInt64Ty(M.getContext());
+ uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
+ if (IsCS)
+ ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+ if (PGOInstrumentEntry)
+ ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+ if (DebugInfoCorrelate)
+ ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
+ if (PGOFunctionEntryCoverage)
+ ProfileVersion |=
+ VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+ auto IRLevelVersionVariable = new GlobalVariable(
+ M, IntTy64, true, GlobalValue::WeakAnyLinkage,
+ Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+ IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
+ Triple TT(M.getTargetTriple());
+ if (TT.supportsCOMDAT()) {
+ IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
+ IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
+ }
+ return IRLevelVersionVariable;
+}
+
namespace {
/// The select instruction visitor plays three roles specified
@@ -469,9 +501,7 @@ private:
createProfileFileNameVar(M, InstrProfileOutput);
// The variable in a comdat may be discarded by LTO. Ensure the
// declaration will be retained.
- appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
- PGOInstrumentEntry,
- DebugInfoCorrelate));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
return false;
}
std::string InstrProfileOutput;
@@ -914,22 +944,39 @@ static void instrumentOneFunc(
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
+
+ Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+ auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
+ auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
+ FuncInfo.FunctionHash);
+ if (PGOFunctionEntryCoverage) {
+ assert(!IsCS &&
+ "entry coverge does not support context-sensitive instrumentation");
+ auto &EntryBB = F.getEntryBlock();
+ IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+ // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
+ // i32 <index>)
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
+ {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+ return;
+ }
+
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
- Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
+ // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
+ // i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
- {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
- Builder.getInt32(I++)});
+ {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
}
// Now instrument select instructions:
@@ -1502,6 +1549,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
}
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+ if (PGOFunctionEntryCoverage)
+ return;
Module *M = F.getParent();
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
@@ -1622,8 +1671,7 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry,
- DebugInfoCorrelate);
+ createIRLevelProfileFlagVar(M, /*IsCS=*/false);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1645,9 +1693,7 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
// The variable in a comdat may be discarded by LTO. Ensure the declaration
// will be retained.
- appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
- PGOInstrumentEntry,
- DebugInfoCorrelate));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
return PreservedAnalyses::all();
}
@@ -1844,6 +1890,18 @@ static bool annotateAllFunctions(
ProfileFileName.data(), "Not an IR level instrumentation profile"));
return false;
}
+ if (PGOReader->hasSingleByteCoverage()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ ProfileFileName.data(),
+ "Cannot use coverage profiles for optimization"));
+ return false;
+ }
+ if (PGOReader->functionEntryOnly()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ ProfileFileName.data(),
+ "Function entry profiles are not yet supported for optimization"));
+ return false;
+ }
// Add the profile summary (read from the header of the indexed summary) here
// so that we can use it below when reading counters (which checks if the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 1ca6ddabac5b..126845bb3308 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -123,20 +123,9 @@ BundledRetainClaimRVs::~BundledRetainClaimRVs() {
// can't be tail calls.
if (auto *CI = dyn_cast<CallInst>(CB))
CI->setTailCallKind(CallInst::TCK_NoTail);
-
- if (UseMarker) {
- // Remove the retainRV/claimRV function operand from the operand bundle
- // to reflect the fact that the backend is responsible for emitting only
- // the marker instruction, but not the retainRV/claimRV call.
- OperandBundleDef OB("clang.arc.attachedcall", None);
- auto *NewCB = CallBase::Create(CB, OB, CB);
- CB->replaceAllUsesWith(NewCB);
- CB->eraseFromParent();
- }
}
- if (!ContractPass || !UseMarker)
- EraseInstruction(P.first);
+ EraseInstruction(P.first);
}
RVCalls.clear();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 2b47bec7ffe8..62f88a8cc02b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -105,8 +105,7 @@ CallInst *createCallInstWithColors(
class BundledRetainClaimRVs {
public:
- BundledRetainClaimRVs(bool ContractPass, bool UseMarker)
- : ContractPass(ContractPass), UseMarker(UseMarker) {}
+ BundledRetainClaimRVs(bool ContractPass) : ContractPass(ContractPass) {}
~BundledRetainClaimRVs();
/// Insert a retainRV/claimRV call to the normal destination blocks of invokes
@@ -156,9 +155,6 @@ private:
DenseMap<CallInst *, CallBase *> RVCalls;
bool ContractPass;
-
- /// Indicates whether the target uses a special inline-asm marker.
- bool UseMarker;
};
} // end namespace objcarc
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 9e2832827686..2985ae004d3c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -434,23 +434,20 @@ bool ObjCARCContract::tryToPeepholeInstruction(
LLVM_FALLTHROUGH;
case ARCInstKind::RetainRV:
case ARCInstKind::UnsafeClaimRV: {
- bool IsInstContainedInBundle = BundledInsts->contains(Inst);
-
- // Return now if the target doesn't need a special inline-asm marker. Return
- // true if this is a bundled retainRV/claimRV call, which is going to be
- // erased at the end of this pass, to avoid undoing objc-arc-expand and
+ // Return true if this is a bundled retainRV/claimRV call, which is always
+ // redundant with the attachedcall in the bundle, and is going to be erased
+ // at the end of this pass. This avoids undoing objc-arc-expand and
// replacing uses of the retainRV/claimRV call's argument with its result.
- if (!RVInstMarker)
- return IsInstContainedInBundle;
-
- // The target needs a special inline-asm marker.
+ if (BundledInsts->contains(Inst))
+ return true;
- // We don't have to emit the marker if this is a bundled call since the
- // backend is responsible for emitting it. Return false to undo
- // objc-arc-expand.
- if (IsInstContainedInBundle)
+ // If this isn't a bundled call, and the target doesn't need a special
+ // inline-asm marker, we're done: return now, and undo objc-arc-expand.
+ if (!RVInstMarker)
return false;
+ // The target needs a special inline-asm marker. Insert it.
+
BasicBlock::iterator BBI = Inst->getIterator();
BasicBlock *InstParent = Inst->getParent();
@@ -548,7 +545,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
AA = A;
DT = D;
PA.setAA(A);
- BundledRetainClaimRVs BRV(true, RVInstMarker);
+ BundledRetainClaimRVs BRV(/*ContractPass=*/true);
BundledInsts = &BRV;
std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index b6dc97f1e43f..e1a000b31cf9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2459,7 +2459,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) {
return false;
Changed = CFGChanged = false;
- BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent()));
+ BundledRetainClaimRVs BRV(/*ContractPass=*/false);
BundledInsts = &BRV;
LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index dda1a2f08076..143a78f604fc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -357,7 +357,7 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
// This map keeps track of all the new definitions for an instruction. This
// information is needed when restoring SSA form after cloning blocks.
-typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap;
+typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
OS << "< ";
@@ -1126,6 +1126,9 @@ private:
/// Add new value mappings to the DefMap to keep track of all new definitions
/// for a particular instruction. These will be used while updating SSA form.
void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
+ SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
+ NewDefsVector.reserve(VMap.size());
+
for (auto Entry : VMap) {
Instruction *Inst =
dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
@@ -1138,11 +1141,18 @@ private:
if (!Cloned)
continue;
- if (NewDefs.find(Inst) == NewDefs.end())
- NewDefs[Inst] = {Cloned};
- else
- NewDefs[Inst].push_back(Cloned);
+ NewDefsVector.push_back({Inst, Cloned});
}
+
+ // Sort the defs to get deterministic insertion order into NewDefs.
+ sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
+ if (LHS.first == RHS.first)
+ return LHS.second->comesBefore(RHS.second);
+ return LHS.first->comesBefore(RHS.first);
+ });
+
+ for (const auto &KV : NewDefsVector)
+ NewDefs[KV.first].push_back(KV.second);
}
/// Update the last branch of a particular cloned path to point to the correct
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index ca19913e37ee..bf4d275e04ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -192,6 +192,7 @@ struct FusionCandidate {
GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
+ assert(DT && "Expected non-null DT!");
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
// the memory reads and writes If any instructions that prevent fusion are
@@ -767,7 +768,7 @@ private:
LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
<< " iterations of the first loop. \n");
- FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
+ FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true);
if (FC0.Peeled) {
LLVM_DEBUG(dbgs() << "Done Peeling\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 35ba4e2b4032..318c4c06f0f7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1172,8 +1172,15 @@ bool LoopIdiomRecognize::processLoopStridedStore(
CallInst *NewCall;
if (SplatValue) {
- NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
- MaybeAlign(StoreAlignment));
+ AAMDNodes AATags = TheStore->getAAMetadata();
+ if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+ AATags = AATags.extendTo(CI->getZExtValue());
+ else
+ AATags = AATags.extendTo(-1);
+
+ NewCall = Builder.CreateMemSet(
+ BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
+ /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
@@ -1452,17 +1459,28 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ AAMDNodes AATags = TheLoad->getAAMetadata();
+ AAMDNodes StoreAATags = TheStore->getAAMetadata();
+ AATags = AATags.merge(StoreAATags);
+ if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+ AATags = AATags.extendTo(CI->getZExtValue());
+ else
+ AATags = AATags.extendTo(-1);
+
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
if (UseMemMove)
- NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
- LoadAlign, NumBytes);
+ NewCall = Builder.CreateMemMove(
+ StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
+ /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
else
- NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
- LoadAlign, NumBytes);
+ NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
+ NumBytes, /*isVolatile=*/false, AATags.TBAA,
+ AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
} else {
// For now don't support unordered atomic memmove.
if (UseMemMove)
@@ -1486,7 +1504,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
- NumBytes, StoreSize);
+ NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
+ AATags.NoAlias);
}
NewCall->setDebugLoc(TheStore->getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 728d63fe2847..d3fcba10c275 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -468,7 +468,7 @@ private:
LI.removeBlock(BB);
}
- DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+ detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
DTU.applyUpdates(DTUpdates);
DTUpdates.clear();
for (auto *BB : DeadLoopBlocks)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 022d9c7abc8c..9beb2281cf0f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1281,7 +1281,7 @@ static LoopUnrollResult tryToUnrollLoop(
<< " iterations";
});
- if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
+ if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) {
simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 8f1d0181ee5b..296becb31e8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1339,16 +1339,21 @@ public:
// Copy load operand to new alloca.
Builder.SetInsertPoint(Copy, Copy->begin());
- AllocaInst *NewLd =
- Builder.CreateAlloca(Load->getType(), Load->getPointerAddressSpace());
- Builder.CreateMemCpy(NewLd, NewLd->getAlign(),
- Load->getPointerOperand(), Load->getAlign(),
- LoadLoc.Size.getValue());
+ auto *VT = cast<FixedVectorType>(Load->getType());
+ // Use an array type for the alloca, to avoid potentially huge alignment
+ // requirements for large vector types.
+ auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
+ AllocaInst *Alloca =
+ Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace());
+ Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo());
+
+ Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(),
+ Load->getAlign(), LoadLoc.Size.getValue());
Builder.SetInsertPoint(Fusion, Fusion->begin());
PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3);
PHI->addIncoming(Load->getPointerOperand(), Check0);
PHI->addIncoming(Load->getPointerOperand(), Check1);
- PHI->addIncoming(NewLd, Copy);
+ PHI->addIncoming(BC, Copy);
// Adjust DT.
DTUpdates.push_back({DT->Insert, Check0, Check1});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 2476e6c408b1..f35c9212a6f9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -77,6 +77,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -1736,18 +1737,18 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
if (Filtered.empty()) {
// If it has undef or poison at this point, it means there are no-non-undef
// arguments, and thus, the value of the phi node must be undef.
- if (HasPoison && !HasUndef) {
- LLVM_DEBUG(
- dbgs() << "PHI Node " << *I
- << " has no non-poison arguments, valuing it as poison\n");
- return createConstantExpression(PoisonValue::get(I->getType()));
- }
if (HasUndef) {
LLVM_DEBUG(
dbgs() << "PHI Node " << *I
<< " has no non-undef arguments, valuing it as undef\n");
return createConstantExpression(UndefValue::get(I->getType()));
}
+ if (HasPoison) {
+ LLVM_DEBUG(
+ dbgs() << "PHI Node " << *I
+ << " has no non-poison arguments, valuing it as poison\n");
+ return createConstantExpression(PoisonValue::get(I->getType()));
+ }
LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
@@ -1757,6 +1758,11 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
if (llvm::all_of(Filtered, [&](Value *Arg) { return Arg == AllSameValue; })) {
+ // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef
+ // in the worst case).
+ if (HasUndef && !isGuaranteedNotToBePoison(AllSameValue, AC, nullptr, DT))
+ return E;
+
// In LLVM's non-standard representation of phi nodes, it's possible to have
// phi nodes with cycles (IE dependent on other phis that are .... dependent
// on the original phi node), especially in weird CFG's where some arguments
@@ -1764,8 +1770,8 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
// infinite loops during evaluation. We work around this by not trying to
// really evaluate them independently, but instead using a variable
// expression to say if one is equivalent to the other.
- // We also special case undef, so that if we have an undef, we can't use the
- // common value unless it dominates the phi block.
+ // We also special case undef/poison, so that if we have an undef, we can't
+ // use the common value unless it dominates the phi block.
if (HasPoison || HasUndef) {
// If we have undef and at least one other value, this is really a
// multivalued phi, and we need to know if it's cycle free in order to
@@ -2853,14 +2859,14 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
}
// The algorithm initially places the values of the routine in the TOP
-// congruence class. The leader of TOP is the undetermined value `undef`.
+// congruence class. The leader of TOP is the undetermined value `poison`.
// When the algorithm has finished, values still in TOP are unreachable.
void NewGVN::initializeCongruenceClasses(Function &F) {
NextCongruenceNum = 0;
// Note that even though we use the live on entry def as a representative
// MemoryAccess, it is *not* the same as the actual live on entry def. We
- // have no real equivalemnt to undef for MemoryAccesses, and so we really
+ // have no real equivalent to poison for MemoryAccesses, and so we really
// should be checking whether the MemoryAccess is top if we want to know if it
// is equivalent to everything. Otherwise, what this really signifies is that
// the access "it reaches all the way back to the beginning of the function"
@@ -3031,7 +3037,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
!isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
- // If all that is left is nothing, our memoryphi is undef. We keep it as
+ // If all that is left is nothing, our memoryphi is poison. We keep it as
// InitialClass. Note: The only case this should happen is if we have at
// least one self-argument.
if (Filtered.begin() == Filtered.end()) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3da367341d2a..b795ad3899bc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
using DefiningValueMapTy = MapVector<Value *, Value *>;
+using PointerToBaseTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
@@ -266,9 +267,6 @@ struct PartiallyConstructedSafepointRecord {
/// The set of values known to be live across this safepoint
StatepointLiveSetTy LiveSet;
- /// Mapping from live pointers to a base-defining-value
- MapVector<Value *, Value *> PointerToBase;
-
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
GCStatepointInst *StatepointToken;
@@ -1255,10 +1253,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// post condition: PointerToBase contains one (derived, base) pair for every
// pointer in live. Note that derived can be equal to base if the original
// pointer was a base pointer.
-static void
-findBasePointers(const StatepointLiveSetTy &live,
- MapVector<Value *, Value *> &PointerToBase,
- DominatorTree *DT, DefiningValueMapTy &DVCache) {
+static void findBasePointers(const StatepointLiveSetTy &live,
+ PointerToBaseTy &PointerToBase, DominatorTree *DT,
+ DefiningValueMapTy &DVCache) {
for (Value *ptr : live) {
Value *base = findBasePointer(ptr, DVCache);
assert(base && "failed to find base pointer");
@@ -1274,8 +1271,8 @@ findBasePointers(const StatepointLiveSetTy &live,
/// parse point.
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
CallBase *Call,
- PartiallyConstructedSafepointRecord &result) {
- MapVector<Value *, Value *> PointerToBase;
+ PartiallyConstructedSafepointRecord &result,
+ PointerToBaseTy &PointerToBase) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
// optimization, we can use this to avoid seperately materializing the base
@@ -1290,37 +1287,27 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
PointerToBase[V] = V;
}
findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
-
- if (PrintBasePointers) {
- errs() << "Base Pairs (w/o Relocation):\n";
- for (auto &Pair : PointerToBase) {
- errs() << " derived ";
- Pair.first->printAsOperand(errs(), false);
- errs() << " base ";
- Pair.second->printAsOperand(errs(), false);
- errs() << "\n";;
- }
- }
-
- result.PointerToBase = PointerToBase;
}
/// Given an updated version of the dataflow liveness results, update the
/// liveset and base pointer maps for the call site CS.
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
- PartiallyConstructedSafepointRecord &result);
+ PartiallyConstructedSafepointRecord &result,
+ PointerToBaseTy &PointerToBase);
static void recomputeLiveInValues(
Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
- MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
+ PointerToBaseTy &PointerToBase) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
// again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info);
+ recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info,
+ PointerToBase);
}
}
@@ -1537,7 +1524,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
const SmallVectorImpl<Value *> &LiveVariables,
PartiallyConstructedSafepointRecord &Result,
- std::vector<DeferredReplacement> &Replacements) {
+ std::vector<DeferredReplacement> &Replacements,
+ const PointerToBaseTy &PointerToBase) {
assert(BasePtrs.size() == LiveVariables.size());
// Then go ahead and use the builder do actually do the inserts. We insert
@@ -1626,10 +1614,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
auto &Context = Call->getContext();
auto &DL = Call->getModule()->getDataLayout();
auto GetBaseAndOffset = [&](Value *Derived) {
- assert(Result.PointerToBase.count(Derived));
+ assert(PointerToBase.count(Derived));
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
- Value *Base = Result.PointerToBase.find(Derived)->second;
+ Value *Base = PointerToBase.find(Derived)->second;
Value *Base_int = Builder.CreatePtrToInt(
Base, Type::getIntNTy(Context, IntPtrSize));
Value *Derived_int = Builder.CreatePtrToInt(
@@ -1819,9 +1807,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
static void
makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
PartiallyConstructedSafepointRecord &Result,
- std::vector<DeferredReplacement> &Replacements) {
+ std::vector<DeferredReplacement> &Replacements,
+ const PointerToBaseTy &PointerToBase) {
const auto &LiveSet = Result.LiveSet;
- const auto &PointerToBase = Result.PointerToBase;
// Convert to vector for efficient cross referencing.
SmallVector<Value *, 64> BaseVec, LiveVec;
@@ -1836,7 +1824,8 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
assert(LiveVec.size() == BaseVec.size());
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
+ makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
+ PointerToBase);
}
// Helper function for the relocationViaAlloca.
@@ -2238,6 +2227,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
// relocated values we don't do any user adjustments here.
static void rematerializeLiveValues(CallBase *Call,
PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase,
TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
@@ -2248,7 +2238,7 @@ static void rematerializeLiveValues(CallBase *Call,
for (Value *LiveValue: Info.LiveSet) {
// For each live pointer find its defining chain
SmallVector<Instruction *, 3> ChainToBase;
- assert(Info.PointerToBase.count(LiveValue));
+ assert(PointerToBase.count(LiveValue));
Value *RootOfChain =
findRematerializableChainToBasePointer(ChainToBase,
LiveValue);
@@ -2260,9 +2250,9 @@ static void rematerializeLiveValues(CallBase *Call,
// Handle the scenario where the RootOfChain is not equal to the
// Base Value, but they are essentially the same phi values.
- if (RootOfChain != Info.PointerToBase[LiveValue]) {
+ if (RootOfChain != PointerToBase[LiveValue]) {
PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
- PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]);
+ PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
if (!OrigRootPhi || !AlternateRootPhi)
continue;
// PHI nodes that have the same incoming values, and belonging to the same
@@ -2362,7 +2352,7 @@ static void rematerializeLiveValues(CallBase *Call,
Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
- InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ InsertBefore, RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
auto *Invoke = cast<InvokeInst>(Call);
@@ -2373,9 +2363,9 @@ static void rematerializeLiveValues(CallBase *Call,
&*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue = rematerializeChain(
- NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
Instruction *UnwindRematerializedValue = rematerializeChain(
- UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+ UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2491,10 +2481,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// site.
findLiveReferences(F, DT, ToUpdate, Records);
+ /// Global mapping from live pointers to a base-defining-value.
+ PointerToBaseTy PointerToBase;
+
// B) Find the base pointers for each live pointer
for (size_t i = 0; i < Records.size(); i++) {
PartiallyConstructedSafepointRecord &info = Records[i];
- findBasePointers(DT, DVCache, ToUpdate[i], info);
+ findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+ }
+ if (PrintBasePointers) {
+ errs() << "Base Pairs (w/o Relocation):\n";
+ for (auto &Pair : PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
+ ;
+ }
}
// The base phi insertion logic (for any safepoint) may have inserted new
@@ -2515,8 +2519,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
PartiallyConstructedSafepointRecord &Info = Records[i];
SmallVector<Value *, 128> Bases;
- for (auto Pair : Info.PointerToBase)
- Bases.push_back(Pair.second);
+ for (auto *Derived : Info.LiveSet) {
+ assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
+ Bases.push_back(PointerToBase[Derived]);
+ }
insertUseHolderAfter(ToUpdate[i], Bases, Holders);
}
@@ -2524,18 +2530,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, ToUpdate, Records);
+ recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase);
if (PrintBasePointers) {
- for (auto &Info : Records) {
- errs() << "Base Pairs: (w/Relocation)\n";
- for (auto Pair : Info.PointerToBase) {
- errs() << " derived ";
- Pair.first->printAsOperand(errs(), false);
- errs() << " base ";
- Pair.second->printAsOperand(errs(), false);
- errs() << "\n";
- }
+ errs() << "Base Pairs: (w/Relocation)\n";
+ for (auto Pair : PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
}
}
@@ -2547,10 +2551,12 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Note that the relocation placement code relies on this filtering for
// correctness as it expects the base to be in the liveset, which isn't true
// if the base is constant.
- for (auto &Info : Records)
- for (auto &BasePair : Info.PointerToBase)
- if (isa<Constant>(BasePair.second))
- Info.LiveSet.remove(BasePair.first);
+ for (auto &Info : Records) {
+ Info.LiveSet.remove_if([&](Value *LiveV) {
+ assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
+ return isa<Constant>(PointerToBase[LiveV]);
+ });
+ }
for (CallInst *CI : Holders)
CI->eraseFromParent();
@@ -2561,7 +2567,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
for (size_t i = 0; i < Records.size(); i++)
- rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
+ rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
// We need this to safely RAUW and delete call or invoke return values that
// may themselves be live over a statepoint. For details, please see usage in
@@ -2575,7 +2581,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// previous statepoint can not be a live variable, thus we can and remove
// the old statepoint calls as we go.)
for (size_t i = 0; i < Records.size(); i++)
- makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+ makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
+ PointerToBase);
ToUpdate.clear(); // prevent accident use of invalid calls.
@@ -2594,8 +2601,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// these live sets, and migrate to using that data structure from this point
// onward.
Info.LiveSet.clear();
- Info.PointerToBase.clear();
}
+ PointerToBase.clear();
// Do all the fixups of the original live variables to their relocated selves
SmallVector<Value *, 128> Live;
@@ -3115,35 +3122,15 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
- PartiallyConstructedSafepointRecord &Info) {
+ PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase) {
StatepointLiveSetTy Updated;
findLiveSetAtInst(Call, RevisedLivenessData, Updated);
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
for (auto V : Updated)
- Info.PointerToBase.insert({V, V});
-
-#ifndef NDEBUG
- for (auto V : Updated)
- assert(Info.PointerToBase.count(V) &&
- "Must be able to find base for live value!");
-#endif
-
- // Remove any stale base mappings - this can happen since our liveness is
- // more precise then the one inherent in the base pointer analysis.
- DenseSet<Value *> ToErase;
- for (auto KVPair : Info.PointerToBase)
- if (!Updated.count(KVPair.first))
- ToErase.insert(KVPair.first);
-
- for (auto *V : ToErase)
- Info.PointerToBase.erase(V);
-
-#ifndef NDEBUG
- for (auto KVPair : Info.PointerToBase)
- assert(Updated.count(KVPair.first) && "record for non-live value");
-#endif
+ PointerToBase.insert({ V, V });
Info.LiveSet = Updated;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 35497ae5ed9a..8be8946702be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ac580b4161f4..b3a445368537 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -276,6 +276,8 @@ class StructurizeCFG {
void insertConditions(bool Loops);
+ void simplifyConditions();
+
void delPhiValues(BasicBlock *From, BasicBlock *To);
void addPhiValues(BasicBlock *From, BasicBlock *To);
@@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
}
}
+/// Simplify any inverted conditions that were built by buildConditions.
+void StructurizeCFG::simplifyConditions() {
+ SmallVector<Instruction *> InstToErase;
+ for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
+ auto &Preds = I.second;
+ for (auto &J : Preds) {
+ auto &Cond = J.second;
+ Instruction *Inverted;
+ if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
+ !Cond->use_empty()) {
+ if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
+ InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
+ Cond->replaceAllUsesWith(InvertedCmp);
+ InstToErase.push_back(cast<Instruction>(Cond));
+ }
+ }
+ }
+ }
+ for (auto *I : InstToErase)
+ I->eraseFromParent();
+}
+
/// Remove all PHI values coming from "From" into "To" and remember
/// them in DeletedPhis
void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
@@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
createFlow();
insertConditions(false);
insertConditions(true);
+ simplifyConditions();
setPhiValues();
simplifyAffectedPhis();
rebuildSSA();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d6d6b1a7fa09..15c4a64eb794 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -59,7 +59,7 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
"is followed by a block that either has a terminating "
"deoptimizing call or is terminated with an unreachable"));
-void llvm::DetatchDeadBlocks(
+void llvm::detachDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
bool KeepOneInputPHIs) {
@@ -110,7 +110,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
#endif
SmallVector<DominatorTree::UpdateType, 4> Updates;
- DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+ detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
if (DTU)
DTU->applyUpdates(Updates);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 048e691e33cf..86413df664a0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -694,38 +694,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
VMap[OrigV] = I;
}
+ // Simplify conditional branches and switches with a constant operand. We try
+ // to prune these out when cloning, but if the simplification required
+ // looking through PHI nodes, those are only available after forming the full
+ // basic block. That may leave some here, and we still want to prune the dead
+ // code as early as possible.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ ConstantFoldTerminator(&BB);
+
+ // Some blocks may have become unreachable as a result. Find and delete them.
+ {
+ SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+ SmallVector<BasicBlock *, 16> Worklist;
+ Worklist.push_back(&*Begin);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (ReachableBlocks.insert(BB).second)
+ append_range(Worklist, successors(BB));
+ }
+
+ SmallVector<BasicBlock *, 16> UnreachableBlocks;
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ if (!ReachableBlocks.contains(&BB))
+ UnreachableBlocks.push_back(&BB);
+ DeleteDeadBlocks(UnreachableBlocks);
+ }
+
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
- // We need to simplify conditional branches and switches with a constant
- // operand. We try to prune these out when cloning, but if the
- // simplification required looking through PHI nodes, those are only
- // available after forming the full basic block. That may leave some here,
- // and we still want to prune the dead code as early as possible.
- //
- // Do the folding before we check if the block is dead since we want code
- // like
- // bb:
- // br i1 undef, label %bb, label %bb
- // to be simplified to
- // bb:
- // br label %bb
- // before we call I->getSinglePredecessor().
- ConstantFoldTerminator(&*I);
-
- // Check if this block has become dead during inlining or other
- // simplifications. Note that the first block will appear dead, as it has
- // not yet been wired up properly.
- if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) {
- BasicBlock *DeadBB = &*I++;
- DeleteDeadBlock(DeadBB);
- continue;
- }
-
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) {
++I;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 24cd5747c5a4..cec159f6a448 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -857,8 +858,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
(ParamTy.size() + AggParamTy.size()) ==
(inputs.size() + outputs.size()) &&
"Number of scalar and aggregate params does not match inputs, outputs");
- assert(StructValues.empty() ||
- AggregateArgs && "Expeced StructValues only with AggregateArgs set");
+ assert((StructValues.empty() || AggregateArgs) &&
+ "Expeced StructValues only with AggregateArgs set");
// Concatenate scalar and aggregate params in ParamTy.
size_t NumScalarParams = ParamTy.size();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index f8ec8c6ad426..c1c5f5cc879f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -65,15 +65,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType()))
- return true;
-
- // FIXME: Do we need to add constexpr selects to VisitedUsers?
- if (analyzeGlobalAux(CE, GS, VisitedUsers))
- return true;
+ if (const Constant *C = dyn_cast<Constant>(UR)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (CE && isa<PointerType>(CE->getType())) {
+ // Recursively analyze pointer-typed constant expressions.
+ // FIXME: Do we need to add constexpr selects to VisitedUsers?
+ if (analyzeGlobalAux(CE, GS, VisitedUsers))
+ return true;
+ } else {
+ // Ignore dead constant users.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ }
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
@@ -169,10 +172,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (const Constant *C = dyn_cast<Constant>(UR)) {
- // We might have a dead and dangling constant hanging off of here.
- if (!isSafeToDestroyConstant(C))
- return true;
} else {
// Otherwise must be some other user.
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index c9f872f5b7e1..923bcc781e47 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -671,12 +672,9 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
// edge from this block.
SmallVector<Value *, 8> UnwindDestPHIValues;
BasicBlock *InvokeBB = II->getParent();
- for (Instruction &I : *UnwindDest) {
+ for (PHINode &PHI : UnwindDest->phis()) {
// Save the value to use for this edge.
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI)
- break;
- UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
}
// Add incoming-PHI values to the unwind destination block for the given basic
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index 9f33d2f82732..9a10535c9310 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 92333408aaef..5b66da1e7082 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -737,7 +737,7 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
/// for the bulk of dynamic execution, can be further simplified by scalar
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
bool PreserveLCSSA) {
assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
@@ -756,23 +756,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// routes which can lead to the exit: we can reach it from the peeled
// iterations too.
DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
- if (DT) {
- for (auto *BB : L->blocks()) {
- auto *BBDomNode = DT->getNode(BB);
- SmallVector<BasicBlock *, 16> ChildrenToUpdate;
- for (auto *ChildDomNode : BBDomNode->children()) {
- auto *ChildBB = ChildDomNode->getBlock();
- if (!L->contains(ChildBB))
- ChildrenToUpdate.push_back(ChildBB);
- }
- // The new idom of the block will be the nearest common dominator
- // of all copies of the previous idom. This is equivalent to the
- // nearest common dominator of the previous idom and the first latch,
- // which dominates all copies of the previous idom.
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
- for (auto *ChildBB : ChildrenToUpdate)
- NonLoopBlocksIDom[ChildBB] = NewIDom;
+ for (auto *BB : L->blocks()) {
+ auto *BBDomNode = DT.getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
}
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
+ for (auto *ChildBB : ChildrenToUpdate)
+ NonLoopBlocksIDom[ChildBB] = NewIDom;
}
Function *F = Header->getParent();
@@ -822,11 +820,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// If (cond) goto Header
// Exit:
- BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
+ BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
BasicBlock *InsertBot =
- SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
+ SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
BasicBlock *NewPreHeader =
- SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+ SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
InsertTop->setName(Header->getName() + ".peel.begin");
InsertBot->setName(Header->getName() + ".peel.next");
@@ -852,23 +850,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ValueToValueMapTy VMap;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
- LoopBlocks, VMap, LVMap, DT, LI,
+ LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes);
// Remap to use values from the current iteration instead of the
// previous one.
remapInstructionsInBlocks(NewBlocks, VMap);
- if (DT) {
- // Update IDoms of the blocks reachable through exits.
- if (Iter == 0)
- for (auto BBIDom : NonLoopBlocksIDom)
- DT->changeImmediateDominator(BBIDom.first,
- cast<BasicBlock>(LVMap[BBIDom.second]));
+ // Update IDoms of the blocks reachable through exits.
+ if (Iter == 0)
+ for (auto BBIDom : NonLoopBlocksIDom)
+ DT.changeImmediateDominator(BBIDom.first,
+ cast<BasicBlock>(LVMap[BBIDom.second]));
#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif
- }
auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
@@ -877,7 +873,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
InsertTop = InsertBot;
- InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+ InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
InsertBot->setName(Header->getName() + ".peel.next");
F->getBasicBlockList().splice(InsertTop->getIterator(),
@@ -912,10 +908,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SE->forgetTopmostLoop(L);
// Finally DomtTree must be correct.
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
// FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
NumPeeled++;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 7c9ab7f6ca2c..d6a6be2762c7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -264,3 +264,16 @@ void VFABI::setVectorVariantNames(
CI->addFnAttr(
Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
}
+
+void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
+ StringRef SectionName) {
+ // Embed the buffer into the module.
+ Constant *ModuleConstant = ConstantDataArray::get(
+ M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
+ GlobalVariable *GV = new GlobalVariable(
+ M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
+ ModuleConstant, "llvm.embedded.object");
+ GV->setSection(SectionName);
+
+ appendToCompilerUsed(M, GV);
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index 7083789267d9..deaee467531d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b35ab57e0d87..01b433b4782a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,13 +25,13 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index 3ca36a1cad91..43eb5c87acee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -16,6 +16,7 @@
#include "llvm-c/Transforms/Utils.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
index bbe6b3dc23b3..637181722f63 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -2,6 +2,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "vncoerce"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d11f4146b590..3290439ecd07 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -632,13 +632,6 @@ protected:
Instruction *EntryVal, VPValue *Def,
VPTransformState &State);
- /// Returns true if an instruction \p I should be scalarized instead of
- /// vectorized for the chosen vectorization factor.
- bool shouldScalarizeInstruction(Instruction *I) const;
-
- /// Returns true if we should generate a scalar version of \p IV.
- bool needsScalarInduction(Instruction *IV) const;
-
/// Returns (and creates if needed) the original loop trip count.
Value *getOrCreateTripCount(Loop *NewLoop);
@@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
VecInd->addIncoming(LastInduction, LoopVectorLatch);
}
-bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
- return Cost->isScalarAfterVectorization(I, VF) ||
- Cost->isProfitableToScalarize(I, VF);
-}
-
-bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
- if (shouldScalarizeInstruction(IV))
- return true;
- auto isScalarInst = [&](User *U) -> bool {
- auto *I = cast<Instruction>(U);
- return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
- };
- return llvm::any_of(IV->users(), isScalarInst);
-}
-
void InnerLoopVectorizer::widenIntOrFpInduction(
PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
Value *CanonicalIV) {
@@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return ScalarIV;
};
- // Create the vector values from the scalar IV, in the absence of creating a
- // vector IV.
- auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
- Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx;
- if (Step->getType()->isFloatingPointTy())
- StartIdx =
- getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part);
- else
- StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part);
-
- Value *EntryPart =
- getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(),
- State.VF, State.Builder);
- State.set(Def, EntryPart, Part);
- if (Trunc)
- addMetadata(EntryPart, Trunc);
- }
- };
-
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
@@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
return;
}
- // Determine if we want a scalar version of the induction variable. This is
- // true if the induction variable itself is not widened, or if it has at
- // least one user in the loop that is not widened.
- auto NeedsScalarIV = needsScalarInduction(EntryVal);
- if (!NeedsScalarIV) {
+ // Create a new independent vector induction variable, if one is needed.
+ if (Def->needsVectorIV())
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- return;
- }
- // Try to create a new independent vector induction variable. If we can't
- // create the phi node, we will splat the scalar induction variable in each
- // loop iteration.
- if (!shouldScalarizeInstruction(EntryVal)) {
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
- Value *ScalarIV = CreateScalarIV(Step);
+ if (Def->needsScalarIV()) {
// Create scalar steps that can be used by instructions we will later
// scalarize. Note that the addition of the scalar steps will not increase
// the number of instructions in the loop in the common case prior to
// InstCombine. We will be trading one vector extract for each scalar step.
+ Value *ScalarIV = CreateScalarIV(Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
- return;
}
-
- // All IV users are scalar instructions, so only emit a scalar IV, not a
- // vectorised IV. Except when we tail-fold, then the splat IV feeds the
- // predicate used by the masked loads/stores.
- Value *ScalarIV = CreateScalarIV(Step);
- if (!Cost->isScalarEpilogueAllowed())
- CreateSplatIV(ScalarIV, Step);
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
// Determine the number of scalars we need to generate for each unroll
- // iteration. If EntryVal is uniform, we only need to generate the first
- // lane. Otherwise, we generate all VF values.
- bool IsUniform =
- Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF);
- unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
+ // iteration.
+ bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
+ unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
// Compute the scalar steps and save the results in State.
Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
ScalarIVTy->getScalarSizeInBits());
Type *VecIVTy = nullptr;
Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
VecIVTy = VectorType::get(ScalarIVTy, State.VF);
UnitStepVec =
Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
@@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
- if (!IsUniform && State.VF.isScalable()) {
+ if (!FirstLaneOnly && State.VF.isScalable()) {
auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
if (ScalarIVTy->isFloatingPointTy())
@@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
- bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
+ bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
@@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
// consider interleaving beneficial (eg. MVE).
if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
return false;
- if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
+ // FIXME: We should consider changing the threshold for scalable
+ // vectors to take VScaleForTuning into account.
+ if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF)
return true;
return false;
}
@@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
- if (MainLoopVF.isScalable())
- LLVM_DEBUG(
- dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
- "yet supported. Converting to fixed-width (VF="
- << FixedMainLoopVF << ") instead\n");
-
- if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+ if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
}
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
- (Result.Width.getFixedValue() == 1 ||
- isMoreProfitable(NextVF, Result)) &&
+ if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
- << Result.Width.getFixedValue() << "\n";);
+ << Result.Width << "\n";);
return Result;
}
@@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
Mask, Consecutive, Reverse);
}
-VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi,
- ArrayRef<VPValue *> Operands) const {
+static VPWidenIntOrFpInductionRecipe *
+createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
+ VPValue *Start, const InductionDescriptor &IndDesc,
+ LoopVectorizationCostModel &CM, Loop &OrigLoop,
+ VFRange &Range) {
+ // Returns true if an instruction \p I should be scalarized instead of
+ // vectorized for the chosen vectorization factor.
+ auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
+ return CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF);
+ };
+
+ bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ // Returns true if we should generate a scalar version of \p IV.
+ if (ShouldScalarizeInstruction(PhiOrTrunc, VF))
+ return true;
+ auto isScalarInst = [&](User *U) -> bool {
+ auto *I = cast<Instruction>(U);
+ return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF);
+ };
+ return any_of(PhiOrTrunc->users(), isScalarInst);
+ },
+ Range);
+ bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ return ShouldScalarizeInstruction(PhiOrTrunc, VF);
+ },
+ Range);
+ assert(IndDesc.getStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+ if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI,
+ NeedsScalarIV, !NeedsScalarIVOnly);
+ }
+ assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV,
+ !NeedsScalarIVOnly);
+}
+
+VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
+
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) {
- assert(II->getStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
- return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II);
- }
+ if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
+ return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop,
+ Range);
return nullptr;
}
@@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I);
+ return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range);
}
return nullptr;
}
@@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return toVPRecipeResult(Recipe);
VPHeaderPHIRecipe *PhiRecipe = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99c265fc5101..15b349f53fd9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) {
return true;
}
+static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
+ unsigned BaseIndex = 0);
+
+/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
+/// compatible instructions or constants, or just some other regular values.
+static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
+ Value *Op1) {
+ return (isConstant(BaseOp0) && isConstant(Op0)) ||
+ (isConstant(BaseOp1) && isConstant(Op1)) ||
+ (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
+ !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
+ getSameOpcode({BaseOp0, Op0}).getOpcode() ||
+ getSameOpcode({BaseOp1, Op1}).getOpcode();
+}
+
/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
- unsigned BaseIndex = 0) {
+ unsigned BaseIndex) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
+ bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
+ CmpInst::Predicate BasePred =
+ IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
+ : CmpInst::BAD_ICMP_PREDICATE;
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
unsigned AltIndex = BaseIndex;
@@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
continue;
}
}
+ } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
+ auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
+ auto *Inst = cast<Instruction>(VL[Cnt]);
+ Type *Ty0 = BaseInst->getOperand(0)->getType();
+ Type *Ty1 = Inst->getOperand(0)->getType();
+ if (Ty0 == Ty1) {
+ Value *BaseOp0 = BaseInst->getOperand(0);
+ Value *BaseOp1 = BaseInst->getOperand(1);
+ Value *Op0 = Inst->getOperand(0);
+ Value *Op1 = Inst->getOperand(1);
+ CmpInst::Predicate CurrentPred =
+ cast<CmpInst>(VL[Cnt])->getPredicate();
+ CmpInst::Predicate SwappedCurrentPred =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ // Check for compatible operands. If the corresponding operands are not
+ // compatible - need to perform alternate vectorization.
+ if (InstOpcode == Opcode) {
+ if (BasePred == CurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
+ continue;
+ if (BasePred == SwappedCurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
+ continue;
+ if (E == 2 &&
+ (BasePred == CurrentPred || BasePred == SwappedCurrentPred))
+ continue;
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ Value *AltOp0 = AltInst->getOperand(0);
+ Value *AltOp1 = AltInst->getOperand(1);
+ // Check if operands are compatible with alternate operands.
+ if (AltPred == CurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
+ continue;
+ if (AltPred == SwappedCurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
+ continue;
+ }
+ if (BaseIndex == AltIndex) {
+ assert(isValidForAlternation(Opcode) &&
+ isValidForAlternation(InstOpcode) &&
+ "Cast isn't safe for alternation, logic needs to be updated!");
+ AltIndex = Cnt;
+ continue;
+ }
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ if (BasePred == CurrentPred || BasePred == SwappedCurrentPred ||
+ AltPred == CurrentPred || AltPred == SwappedCurrentPred)
+ continue;
+ }
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
+ // Do the analysis for each tree entry only once, otherwise the order of
+ // the same node my be considered several times, though might be not
+ // profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;
+ if (!VisitedOps.insert(OpTE).second)
+ continue;
if (!OpTE->ReuseShuffleIndices.empty() ||
(IgnoreReorder && OpTE == VectorizableTree.front().get()))
continue;
@@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}
- if (VisitedOps.insert(OpTE).second)
- OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
- OpTE->UserTreeIndices.size();
+ OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+ OpTE->UserTreeIndices.size();
assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
--OrdersUses[{}];
}
@@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
- if (isa<BinaryOperator>(VL0)) {
+ auto *CI = dyn_cast<CmpInst>(VL0);
+ if (isa<BinaryOperator>(VL0) || CI) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ if (!CI || all_of(VL, [](Value *V) {
+ return cast<CmpInst>(V)->isCommutative();
+ })) {
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ } else {
+ CmpInst::Predicate P0 = CI->getPredicate();
+ CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
+ CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
+ Value *BaseOp0 = VL0->getOperand(0);
+ Value *BaseOp1 = VL0->getOperand(1);
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ if ((P0 == CurrentPred &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
+ (P0 == CurrentPredSwapped &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
+ std::swap(LHS, RHS);
+ } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
+ std::swap(LHS, RHS);
+ }
+ Left.push_back(LHS);
+ Right.push_back(RHS);
+ }
+ }
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
@@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
+ Builder.getInt1Ty(),
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTI->getCmpSelInstrCost(
+ E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
+ E->getAltOp());
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
@@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask);
@@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
Value *LHS = nullptr, *RHS = nullptr;
- if (Instruction::isBinaryOp(E->getOpcode())) {
+ if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
@@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
+ auto *AltCI = cast<CmpInst>(E->getAltOp());
+ CmpInst::Predicate AltPred = AltCI->getPredicate();
+ unsigned AltIdx =
+ std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
+ if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
+ AltPred = CmpInst::getSwappedPredicate(AltPred);
+ V1 = Builder.CreateCmp(AltPred, LHS, RHS);
} else {
V0 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
@@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask, &OpScalars, &AltScalars);
@@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (ScheduleData *BundleMember = picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
Instruction *pickedInst = BundleMember->Inst;
- if (pickedInst->getNextNode() != LastScheduledInst) {
- BS->BB->getInstList().remove(pickedInst);
- BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
- pickedInst);
- }
+ if (pickedInst->getNextNode() != LastScheduledInst)
+ pickedInst->moveBefore(LastScheduledInst);
LastScheduledInst = pickedInst;
}
@@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
R.reorderTopToBottom();
- R.reorderBottomToTop();
+ R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
R.buildExternalUses();
R.computeMinimumValueSizes();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index e5dded3c0f1e..8822c0004eb2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -75,7 +75,8 @@ class VPRecipeBuilder {
/// Check if an induction recipe should be constructed for \I. If so build and
/// return it. If not, return null.
VPWidenIntOrFpInductionRecipe *
- tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands) const;
+ tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands,
+ VFRange &Range) const;
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a96c122db2a9..342d4a074e10 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1649,3 +1649,9 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
for (VPValue *Def : Recipe.definedValues())
assignSlot(Def);
}
+
+bool vputils::onlyFirstLaneUsed(VPValue *Def) {
+ return all_of(Def->users(), [Def](VPUser *U) {
+ return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
+ });
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 824440f98a8b..bcaabca692cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -759,6 +759,14 @@ public:
bool mayReadOrWriteMemory() const {
return mayReadFromMemory() || mayWriteToMemory();
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ /// Conservatively returns false.
+ virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return false;
+ }
};
inline bool VPUser::classof(const VPDef *Def) {
@@ -893,6 +901,24 @@ public:
/// Set the fast-math flags.
void setFastMathFlags(FastMathFlags FMFNew);
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ if (getOperand(0) != Op)
+ return false;
+ switch (getOpcode()) {
+ default:
+ return false;
+ case VPInstruction::ActiveLaneMask:
+ case VPInstruction::CanonicalIVIncrement:
+ case VPInstruction::CanonicalIVIncrementNUW:
+ case VPInstruction::BranchOnCount:
+ return true;
+ };
+ llvm_unreachable("switch should return");
+ }
};
/// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -1027,18 +1053,24 @@ public:
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
const InductionDescriptor &IndDesc;
+ bool NeedsScalarIV;
+ bool NeedsVectorIV;
public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
- const InductionDescriptor &IndDesc)
+ const InductionDescriptor &IndDesc,
+ bool NeedsScalarIV, bool NeedsVectorIV)
: VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this),
- IV(IV), IndDesc(IndDesc) {}
+ IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ NeedsVectorIV(NeedsVectorIV) {}
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
const InductionDescriptor &IndDesc,
- TruncInst *Trunc)
+ TruncInst *Trunc, bool NeedsScalarIV,
+ bool NeedsVectorIV)
: VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this),
- IV(IV), IndDesc(IndDesc) {}
+ IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ NeedsVectorIV(NeedsVectorIV) {}
~VPWidenIntOrFpInductionRecipe() override = default;
@@ -1082,6 +1114,12 @@ public:
const TruncInst *TruncI = getTruncInst();
return TruncI ? TruncI->getType() : IV->getType();
}
+
+ /// Returns true if a scalar phi needs to be created for the induction.
+ bool needsScalarIV() const { return NeedsScalarIV; }
+
+ /// Returns true if a vector phi needs to be created for the induction.
+ bool needsVectorIV() const { return NeedsVectorIV; }
};
/// A pure virtual base class for all recipes modeling header phis, including
@@ -1318,6 +1356,17 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // Recursing through Blend recipes only, must terminate at header phi's the
+ // latest.
+ return all_of(users(), [this](VPUser *U) {
+ return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this);
+ });
+ }
};
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
@@ -1495,6 +1544,13 @@ public:
bool isPacked() const { return AlsoPack; }
bool isPredicated() const { return IsPredicated; }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return isUniform();
+ }
};
/// A recipe for generating conditional branches on the bits of a mask.
@@ -1651,6 +1707,16 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+
+ // Widened, consecutive memory operations only demand the first lane of
+ // their address.
+ return Op == getAddr() && isConsecutive();
+ }
};
/// Canonical scalar induction phi of the vector loop. Starting at the specified
@@ -1686,6 +1752,13 @@ public:
const Type *getScalarType() const {
return getOperand(0)->getLiveInIRValue()->getType();
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// A Recipe for widening the canonical induction variable of the vector loop.
@@ -2766,6 +2839,14 @@ public:
/// Return true if all visited instruction can be combined.
bool isCompletelySLP() const { return CompletelySLP; }
};
+
+namespace vputils {
+
+/// Returns true if only the first lane of \p Def is used.
+bool onlyFirstLaneUsed(VPValue *Def);
+
+} // end namespace vputils
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fb5f3d428189..70ce773a8a85 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -47,7 +47,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
- NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II);
+ NewRecipe =
+ new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -341,10 +342,16 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
- // If the induction recipe is canonical and the types match, use it
- // directly.
- if (WidenOriginalIV && WidenOriginalIV->isCanonical() &&
- WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) {
+ if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
+ WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
+ continue;
+
+ // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
+ // everything WidenNewIV's users need. That is, WidenOriginalIV will
+ // generate a vector phi or all users of WidenNewIV demand the first lane
+ // only.
+ if (WidenOriginalIV->needsVectorIV() ||
+ vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
return;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 0296a995ad29..010ca28fc237 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;