aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp860
1 files changed, 592 insertions, 268 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4b4196edc12b..a4e695497f30 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -21,8 +21,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -64,10 +67,15 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
+static cl::opt<bool>
+ EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden,
+ cl::desc("Use MemorySSA-backed MemCpyOpt."));
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
+STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
namespace {
@@ -271,11 +279,17 @@ private:
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ if (!EnableMemorySSA)
+ AU.addRequired<MemoryDependenceWrapperPass>();
AU.addPreserved<MemoryDependenceWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ if (EnableMemorySSA)
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -297,6 +311,56 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
+// Check that V is either not accessible by the caller, or unwinding cannot
+// occur between Start and End.
+static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
+ Instruction *End) {
+ assert(Start->getParent() == End->getParent() && "Must be in same block");
+ if (!Start->getFunction()->doesNotThrow() &&
+ !isa<AllocaInst>(getUnderlyingObject(V))) {
+ for (const Instruction &I :
+ make_range(Start->getIterator(), End->getIterator())) {
+ if (I.mayThrow())
+ return true;
+ }
+ }
+ return false;
+}
+
+void MemCpyOptPass::eraseInstruction(Instruction *I) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
+ if (MD)
+ MD->removeInstruction(I);
+ I->eraseFromParent();
+}
+
+// Check for mod or ref of Loc between Start and End, excluding both boundaries.
+// Start and End must be in the same block
+static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ assert(Start->getBlock() == End->getBlock() && "Only local supported");
+ for (const MemoryAccess &MA :
+ make_range(++Start->getIterator(), End->getIterator())) {
+ if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
+ Loc)))
+ return true;
+ }
+ return false;
+}
+
+// Check for mod of Loc between Start and End, excluding both boundaries.
+// Start and End can be in different blocks.
+static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ // TODO: Only walk until we hit Start.
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ End->getDefiningAccess(), Loc);
+ return !MSSA->dominates(Clobber, Start);
+}
+
/// When scanning forward over instructions, we look for some other patterns to
/// fold away. In particular, this looks for stores to neighboring locations of
/// memory. If it sees enough consecutive ones, it attempts to merge them
@@ -313,7 +377,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
MemsetRanges Ranges(DL);
BasicBlock::iterator BI(StartInst);
+
+ // Keeps track of the last memory use or def before the insertion point for
+ // the new memset. The new MemoryDef for the inserted memsets will be inserted
+ // after MemInsertPoint. It points to either LastMemDef or to the last user
+ // before the insertion point of the memset, if there are any such users.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ // Keeps track of the last MemoryDef between StartInst and the insertion point
+ // for the new memset. This will become the defining access of the inserted
+ // memsets.
+ MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
+ if (MSSAU) {
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
+ }
+ }
+
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
@@ -327,8 +411,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
+ Value *StoredVal = NextStore->getValueOperand();
+
+ // Don't convert stores of non-integral pointer types to memsets (which
+ // stores integers).
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
+ Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
@@ -392,15 +483,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
: Range.TheStores) dbgs()
<< *SI << '\n';
dbgs() << "With: " << *AMemSet << '\n');
-
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- // Zap all the stores.
- for (Instruction *SI : Range.TheStores) {
- MD->removeInstruction(SI);
- SI->eraseFromParent();
+ if (MSSAU) {
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
}
+
+ // Zap all the stores.
+ for (Instruction *SI : Range.TheStores)
+ eraseInstruction(SI);
+
++NumMemSetInfer;
}
@@ -411,11 +514,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// It will lift the store and its argument + that anything that
// may alias with these.
// The method returns true if it was successful.
-static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
- const LoadInst *LI) {
+bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// If the store alias this position, early bail out.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (isModOrRefSet(AA.getModRefInfo(P, StoreLoc)))
+ if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
return false;
// Keep track of the arguments of all instruction we plan to lift
@@ -426,7 +528,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
Args.insert(Ptr);
// Instruction to lift before P.
- SmallVector<Instruction*, 8> ToLift;
+ SmallVector<Instruction *, 8> ToLift{SI};
// Memory locations of lifted instructions.
SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
@@ -439,19 +541,24 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
auto *C = &*I;
- bool MayAlias = isModOrRefSet(AA.getModRefInfo(C, None));
+ // Make sure hoisting does not perform a store that was not guaranteed to
+ // happen.
+ if (!isGuaranteedToTransferExecutionToSuccessor(C))
+ return false;
+
+ bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
bool NeedLift = false;
if (Args.erase(C))
NeedLift = true;
else if (MayAlias) {
- NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
- return isModOrRefSet(AA.getModRefInfo(C, ML));
+ NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
+ return isModOrRefSet(AA->getModRefInfo(C, ML));
});
if (!NeedLift)
- NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) {
- return isModOrRefSet(AA.getModRefInfo(C, Call));
+ NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
+ return isModOrRefSet(AA->getModRefInfo(C, Call));
});
}
@@ -461,18 +568,18 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
if (MayAlias) {
// Since LI is implicitly moved downwards past the lifted instructions,
// none of them may modify its source.
- if (isModSet(AA.getModRefInfo(C, LoadLoc)))
+ if (isModSet(AA->getModRefInfo(C, LoadLoc)))
return false;
else if (const auto *Call = dyn_cast<CallBase>(C)) {
// If we can't lift this before P, it's game over.
- if (isModOrRefSet(AA.getModRefInfo(P, Call)))
+ if (isModOrRefSet(AA->getModRefInfo(P, Call)))
return false;
Calls.push_back(Call);
} else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
// If we can't lift this before P, it's game over.
auto ML = MemoryLocation::get(C);
- if (isModOrRefSet(AA.getModRefInfo(P, ML)))
+ if (isModOrRefSet(AA->getModRefInfo(P, ML)))
return false;
MemLocs.push_back(ML);
@@ -492,10 +599,40 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
}
}
- // We made it, we need to lift
+ // Find MSSA insertion point. Normally P will always have a corresponding
+ // memory access before which we can insert. However, with non-standard AA
+ // pipelines, there may be a mismatch between AA and MSSA, in which case we
+ // will scan for a memory access before P. In either case, we know for sure
+ // that at least the load will have a memory access.
+ // TODO: Simplify this once P will be determined by MSSA, in which case the
+ // discrepancy can no longer occur.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ if (MSSAU) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
+ }
+ }
+ }
+ }
+
+ // We made it, we need to lift.
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
+ if (MSSAU) {
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
+ }
+ }
}
return true;
@@ -515,23 +652,30 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
+ Value *StoredVal = SI->getValueOperand();
+
+ // Not all the transforms below are correct for non-integral pointers, bail
+ // until we've audited the individual pieces.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ return false;
+
// Load to store forwarding can be interpreted as memcpy.
- if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
auto *T = LI->getType();
if (T->isAggregateType()) {
- AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
// the memory we load from in between the load and the store. If
// such an instruction is found, we try to promote there instead
// of at the store position.
+ // TODO: Can use MSSA for this.
Instruction *P = SI;
for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
- if (isModSet(AA.getModRefInfo(&I, LoadLoc))) {
+ if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
P = &I;
break;
}
@@ -542,7 +686,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// position if nothing alias the store memory after this and the store
// destination is not in the range.
if (P && P != SI) {
- if (!moveUp(AA, SI, P, LI))
+ if (!moveUp(SI, P, LI))
P = nullptr;
}
@@ -553,7 +697,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// memmove must be used to preserve semantic. If not, memcpy can
// be used.
bool UseMemMove = false;
- if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -572,10 +716,16 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- MD->removeInstruction(SI);
- SI->eraseFromParent();
- MD->removeInstruction(LI);
- LI->eraseFromParent();
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
// Make sure we do not invalidate the iterator.
@@ -587,44 +737,50 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
- MemDepResult ldep = MD->getDependency(LI);
CallInst *C = nullptr;
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
+ if (EnableMemorySSA) {
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ }
+ } else {
+ MemDepResult ldep = MD->getDependency(LI);
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+ }
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- Value *CpyDest = SI->getPointerOperand()->stripPointerCasts();
- bool CpyDestIsLocal = isa<AllocaInst>(CpyDest);
- AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA.getModRefInfo(&*I, StoreLoc))) {
+ if (EnableMemorySSA) {
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
C = nullptr;
- break;
- }
- // The store to dest may never happen if an exception can be thrown
- // between the load and the store.
- if (I->mayThrow() && !CpyDestIsLocal) {
- C = nullptr;
- break;
+ } else {
+ for (BasicBlock::iterator I = --SI->getIterator(),
+ E = C->getIterator();
+ I != E; --I) {
+ if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
+ C = nullptr;
+ break;
+ }
}
}
}
if (C) {
bool changed = performCallSlotOptzn(
- LI, SI->getPointerOperand()->stripPointerCasts(),
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
commonAlignment(SI->getAlign(), LI->getAlign()), C);
if (changed) {
- MD->removeInstruction(SI);
- SI->eraseFromParent();
- MD->removeInstruction(LI);
- LI->eraseFromParent();
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
return true;
}
@@ -658,8 +814,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- MD->removeInstruction(SI);
- SI->eraseFromParent();
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
NumMemSetInfer++;
// Make sure we do not invalidate the iterator.
@@ -686,7 +849,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
+bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
+ Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, uint64_t cpyLen,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
@@ -717,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
if (!srcArraySize)
return false;
- const DataLayout &DL = cpy->getModule()->getDataLayout();
+ const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
@@ -727,43 +891,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
- // The destination is an alloca. Check it is larger than srcSize.
- ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
- if (!destArraySize)
- return false;
-
- uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
- destArraySize->getZExtValue();
-
- if (destSize < srcSize)
- return false;
- } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
- // The store to dest may never happen if the call can throw.
- if (C->mayThrow())
- return false;
-
- if (A->getDereferenceableBytes() < srcSize) {
- // If the destination is an sret parameter then only accesses that are
- // outside of the returned struct type can trap.
- if (!A->hasStructRetAttr())
- return false;
-
- Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- if (!StructTy->isSized()) {
- // The call may never return and hence the copy-instruction may never
- // be executed, and therefore it's not safe to say "the destination
- // has at least <cpyLen> bytes, as implied by the copy-instruction",
- return false;
- }
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ DL, C, DT))
+ return false;
- uint64_t destSize = DL.getTypeAllocSize(StructTy);
- if (destSize < srcSize)
- return false;
- }
- } else {
+ // Make sure that nothing can observe cpyDest being written early. There are
+ // a number of cases to consider:
+ // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
+ // the transform.
+ // 2. C itself may not access cpyDest (prior to the transform). This is
+ // checked further below.
+ // 3. If cpyDest is accessible to the caller of this function (potentially
+ // captured and not based on an alloca), we need to ensure that we cannot
+ // unwind between C and cpyStore. This is checked here.
+ // 4. If cpyDest is potentially captured, there may be accesses to it from
+ // another thread. In this case, we need to check that cpyStore is
+ // guaranteed to be executed if C is. As it is a non-atomic access, it
+ // renders accesses from other threads undefined.
+ // TODO: This is currently not checked.
+ if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
return false;
- }
// Check that dest points to memory that is at least as aligned as src.
Align srcAlign = srcAlloca->getAlign();
@@ -777,29 +924,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
// the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
- srcAlloca->user_end());
+ SmallVector<User *, 8> srcUseList(srcAlloca->users());
while (!srcUseList.empty()) {
User *U = srcUseList.pop_back_val();
if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
- for (User *UU : U->users())
- srcUseList.push_back(UU);
+ append_range(srcUseList, U->users());
continue;
}
if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
if (!G->hasAllZeroIndices())
return false;
- for (User *UU : U->users())
- srcUseList.push_back(UU);
+ append_range(srcUseList, U->users());
continue;
}
if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
if (IT->isLifetimeStartOrEnd())
continue;
- if (U != C && U != cpy)
+ if (U != C && U != cpyLoad)
return false;
}
@@ -811,20 +955,24 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = LookupDomTree();
- if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
- if (!DT.dominates(cpyDestInst, C))
+ if (!DT->dominates(cpyDest, C)) {
+ // Support moving a constant index GEP before the call.
+ auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
+ if (GEP && GEP->hasAllConstantIndices() &&
+ DT->dominates(GEP->getPointerOperand(), C))
+ GEP->moveBefore(C);
+ else
return false;
+ }
// In addition to knowing that the call does not access src in some
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = LookupAliasAnalysis();
- ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
+ ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
// If necessary, perform additional analysis.
if (isModOrRefSet(MR))
- MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT);
+ MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
if (isModOrRefSet(MR))
return false;
@@ -866,7 +1014,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MD->removeInstruction(C);
+ if (MD)
+ MD->removeInstruction(C);
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
@@ -875,12 +1024,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
LLVMContext::MD_noalias,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
- combineMetadata(C, cpy, KnownIDs, true);
-
- // Remove the memcpy.
- MD->removeInstruction(cpy);
- ++NumMemCpyInstr;
+ combineMetadata(C, cpyLoad, KnownIDs, true);
+ ++NumCallSlot;
return true;
}
@@ -908,8 +1054,6 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = LookupAliasAnalysis();
-
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
// memcpy(a <- b)
@@ -919,21 +1063,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- //
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
+ if (EnableMemorySSA) {
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA.isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(MDep)))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -943,18 +1094,25 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
IRBuilder<> Builder(M);
+ Instruction *NewM;
if (UseMemMove)
- Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
else
- Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
+
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
// Remove the instruction we're replacing.
- MD->removeInstruction(M);
- M->eraseFromParent();
+ eraseInstruction(M);
++NumMemCpyInstr;
return true;
}
@@ -979,18 +1137,41 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
if (MemSet->getDest() != MemCpy->getDest())
return false;
- // Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo =
- MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
- MemCpy->getIterator(), MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
+ // Check that src and dst of the memcpy aren't the same. While memcpy
+ // operands cannot partially overlap, exact equality is allowed.
+ if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
+ LocationSize::precise(1)),
+ MemoryLocation(MemCpy->getDest(),
+ LocationSize::precise(1))))
return false;
+ if (EnableMemorySSA) {
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy))) {
+ return false;
+ }
+ } else {
+ // We have already checked that dst up to src_size is not accessed. We
+ // need to make sure that there are no accesses up to dst_size either.
+ MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
+ MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
+ MemCpy->getParent());
+ if (DstDepInfo.getInst() != MemSet)
+ return false;
+ }
+
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
Value *DestSize = MemSet->getLength();
Value *SrcSize = MemCpy->getLength();
+ if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
+ return false;
+
// By default, create an unaligned memset.
unsigned Align = 1;
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
@@ -1016,13 +1197,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- Builder.CreateMemSet(
+ Instruction *NewMemSet = Builder.CreateMemSet(
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- MD->removeInstruction(MemSet);
- MemSet->eraseFromParent();
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(MemSet);
return true;
}
@@ -1041,6 +1234,24 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
return false;
}
+static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, ConstantInt *Size) {
+ if (MSSA->isLiveOnEntryDef(Def))
+ return isa<AllocaInst>(getUnderlyingObject(V));
+
+ if (IntrinsicInst *II =
+ dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+ if (AA->isMustAlias(V, II->getArgOperand(1)) &&
+ LTSize->getZExtValue() >= Size->getZExtValue())
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1057,11 +1268,9 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
/// The \p MemCpy must have a Constant length.
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
MemSetInst *MemSet) {
- AliasAnalysis &AA = LookupAliasAnalysis();
-
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
// memcpying from the same address. Otherwise it is hard to reason about.
- if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
+ if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
// A known memset size is required.
@@ -1078,17 +1287,37 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// interested in the bytes from MemSetSize..CopySize here, but as we can't
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
- CopySize = MemSetSize;
- else
+ bool CanReduceSize = false;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
+ CanReduceSize = true;
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CanReduceSize = true;
+ }
+
+ if (!CanReduceSize)
return false;
+ CopySize = MemSetSize;
}
IRBuilder<> Builder(MemCpy);
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize,
- MaybeAlign(MemCpy->getDestAlignment()));
+ Instruction *NewM =
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
+ CopySize, MaybeAlign(MemCpy->getDestAlignment()));
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
return true;
}
@@ -1104,8 +1333,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
++BBI;
- MD->removeInstruction(M);
- M->eraseFromParent();
+ eraseInstruction(M);
return true;
}
@@ -1115,73 +1343,156 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
- Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- MaybeAlign(M->getDestAlignment()), false);
- MD->removeInstruction(M);
- M->eraseFromParent();
+ Instruction *NewM =
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
+ MaybeAlign(M->getDestAlignment()), false);
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(M);
++NumCpyToSet;
return true;
}
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for now.
+ // Additionally, we need to ensure that there are no accesses to dest
+ // between the call and the memcpy. Accesses to src will be checked
+ // by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
+ return processMemCpyMemCpyDependence(M, MDep);
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
+ }
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started its
- // lifetime copies undefined data, and we can therefore eliminate the
- // memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
+ if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
}
- }
+ } else {
+ MemDepResult DepInfo = MD->getDependency(M);
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ if (DepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
}
- }
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- ++NumCpyToSet;
+ MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
+
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep);
+ } else if (SrcDepInfo.isDef()) {
+ if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
+ }
+
+ if (SrcDepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
return false;
}
@@ -1189,14 +1500,12 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = LookupAliasAnalysis();
-
if (!TLI->has(LibFunc_memmove))
return false;
// See if the pointers alias.
- if (!AA.isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(M)))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1209,9 +1518,13 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
Intrinsic::memcpy, ArgTys));
+ // For MemorySSA nothing really changes (except that memcpy may imply stricter
+ // aliasing guarantees).
+
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- MD->removeInstruction(M);
+ if (MD)
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
@@ -1224,16 +1537,25 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
- CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
+ MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemCpyInst *MDep = nullptr;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ Loc, true, CB.getIterator(), CB.getParent());
+ if (!DepInfo.isClobber())
+ return false;
+ MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ }
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
- MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
@@ -1250,12 +1572,10 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
- AssumptionCache &AC = LookupAssumptionCache();
- DominatorTree &DT = LookupDomTree();
MaybeAlign MemDepAlign = MDep->getSourceAlign();
if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, &AC,
- &DT) < *ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
+ DT) < *ByValAlign)
return false;
// The address space of the memcpy source must match the byval argument
@@ -1269,14 +1589,19 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- //
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
+ if (EnableMemorySSA) {
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CB.getIterator(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1301,15 +1626,13 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool MadeChange = false;
- DominatorTree &DT = LookupDomTree();
-
// Walk all instruction in the function.
for (BasicBlock &BB : F) {
// Skip unreachable blocks. For example processStore assumes that an
// instruction in a BB can't be dominated by a later instruction in the
// same BB (which is a scenario that can happen for an unreachable BB that
// has itself as a predecessor).
- if (!DT.isReachableFromEntry(&BB))
+ if (!DT->isReachableFromEntry(&BB))
continue;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
@@ -1345,43 +1668,43 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+ auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
+ : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
-
- auto LookupAliasAnalysis = [&]() -> AliasAnalysis & {
- return AM.getResult<AAManager>(F);
- };
- auto LookupAssumptionCache = [&]() -> AssumptionCache & {
- return AM.getResult<AssumptionAnalysis>(F);
- };
- auto LookupDomTree = [&]() -> DominatorTree & {
- return AM.getResult<DominatorTreeAnalysis>(F);
- };
-
- bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis,
- LookupAssumptionCache, LookupDomTree);
+ auto *AA = &AM.getResult<AAManager>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
+ : AM.getCachedResult<MemorySSAAnalysis>(F);
+
+ bool MadeChange =
+ runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
- PA.preserve<MemoryDependenceAnalysis>();
+ if (MD)
+ PA.preserve<MemoryDependenceAnalysis>();
+ if (MSSA)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(
- Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_,
- std::function<AliasAnalysis &()> LookupAliasAnalysis_,
- std::function<AssumptionCache &()> LookupAssumptionCache_,
- std::function<DominatorTree &()> LookupDomTree_) {
+bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
+ TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
+ AssumptionCache *AC_, DominatorTree *DT_,
+ MemorySSA *MSSA_) {
bool MadeChange = false;
MD = MD_;
TLI = TLI_;
- LookupAliasAnalysis = std::move(LookupAliasAnalysis_);
- LookupAssumptionCache = std::move(LookupAssumptionCache_);
- LookupDomTree = std::move(LookupDomTree_);
-
+ AA = AA_;
+ AC = AC_;
+ DT = DT_;
+ MSSA = MSSA_;
+ MemorySSAUpdater MSSAU_(MSSA_);
+ MSSAU = MSSA_ ? &MSSAU_ : nullptr;
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
@@ -1394,6 +1717,9 @@ bool MemCpyOptPass::runImpl(
MadeChange = true;
}
+ if (MSSA_ && VerifyMemorySSA)
+ MSSA_->verifyMemorySSA();
+
MD = nullptr;
return MadeChange;
}
@@ -1403,19 +1729,17 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ auto *MDWP = !EnableMemorySSA
+ ? &getAnalysis<MemoryDependenceWrapperPass>()
+ : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
- return getAnalysis<AAResultsWrapperPass>().getAAResults();
- };
- auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & {
- return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- };
- auto LookupDomTree = [this]() -> DominatorTree & {
- return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- };
-
- return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache,
- LookupDomTree);
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *MSSAWP = EnableMemorySSA
+ ? &getAnalysis<MemorySSAWrapperPass>()
+ : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+
+ return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
+ MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}