aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp149
1 files changed, 104 insertions, 45 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 67335a45fb58..6698db26626b 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -171,7 +172,7 @@ public:
bool empty() const { return Ranges.empty(); }
void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ if (auto *SI = dyn_cast<StoreInst>(Inst))
addStore(OffsetFromFirst, SI);
else
addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
@@ -312,15 +313,21 @@ INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
Instruction *End) {
assert(Start->getParent() == End->getParent() && "Must be in same block");
- if (!Start->getFunction()->doesNotThrow() &&
- !isa<AllocaInst>(getUnderlyingObject(V))) {
- for (const Instruction &I :
- make_range(Start->getIterator(), End->getIterator())) {
- if (I.mayThrow())
- return true;
- }
- }
- return false;
+ // Function can't unwind, so it also can't be visible through unwinding.
+ if (Start->getFunction()->doesNotThrow())
+ return false;
+
+ // Object is not visible on unwind.
+ // TODO: Support RequiresNoCaptureBeforeUnwind case.
+ bool RequiresNoCaptureBeforeUnwind;
+ if (isNotVisibleOnUnwind(getUnderlyingObject(V),
+ RequiresNoCaptureBeforeUnwind) &&
+ !RequiresNoCaptureBeforeUnwind)
+ return false;
+
+ // Check whether there are any unwinding instructions in the range.
+ return any_of(make_range(Start->getIterator(), End->getIterator()),
+ [](const Instruction &I) { return I.mayThrow(); });
}
void MemCpyOptPass::eraseInstruction(Instruction *I) {
@@ -364,7 +371,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
const DataLayout &DL = StartInst->getModule()->getDataLayout();
// We can't track scalable types
- if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
+ if (auto *SI = dyn_cast<StoreInst>(StartInst))
if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
return nullptr;
@@ -410,7 +417,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
continue;
}
- if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
@@ -440,7 +447,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Ranges.addStore(*Offset, NextStore);
} else {
- MemSetInst *MSI = cast<MemSetInst>(BI);
+ auto *MSI = cast<MemSetInst>(BI);
if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
!isa<ConstantInt>(MSI->getLength()))
@@ -661,7 +668,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
return false;
// Load to store forwarding can be interpreted as memcpy.
- if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
@@ -871,7 +878,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
// Require that src be an alloca. This simplifies the reasoning considerably.
- AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+ auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
return false;
@@ -890,8 +897,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
- DL, C, DT))
+ DL, C, DT)) {
+ LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
return false;
+ }
// Make sure that nothing can observe cpyDest being written early. There are
// a number of cases to consider:
@@ -907,8 +916,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// guaranteed to be executed if C is. As it is a non-atomic access, it
// renders accesses from other threads undefined.
// TODO: This is currently not checked.
- if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
+ if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
+ LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding");
return false;
+ }
// Check that dest points to memory that is at least as aligned as src.
Align srcAlign = srcAlloca->getAlign();
@@ -930,14 +941,14 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
append_range(srcUseList, U->users());
continue;
}
- if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
+ if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
if (!G->hasAllZeroIndices())
return false;
append_range(srcUseList, U->users());
continue;
}
- if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
+ if (const auto *IT = dyn_cast<IntrinsicInst>(U))
if (IT->isLifetimeStartOrEnd())
continue;
@@ -945,12 +956,57 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
}
- // Check that src isn't captured by the called function since the
- // transformation can cause aliasing issues in that case.
- for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
- if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
+ // Check whether src is captured by the called function, in which case there
+ // may be further indirect uses of src.
+ bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
+ return U->stripPointerCasts() == cpySrc &&
+ !C->doesNotCapture(C->getArgOperandNo(&U));
+ });
+
+ // If src is captured, then check whether there are any potential uses of
+ // src through the captured pointer before the lifetime of src ends, either
+ // due to a lifetime.end or a return from the function.
+ if (SrcIsCaptured) {
+ // Check that dest is not captured before/at the call. We have already
+ // checked that src is not captured before it. If either had been captured,
+ // then the call might be comparing the argument against the captured dest
+ // or src pointer.
+ Value *DestObj = getUnderlyingObject(cpyDest);
+ if (!isIdentifiedFunctionLocal(DestObj) ||
+ PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
+ /* StoreCaptures */ true, C, DT,
+ /* IncludeI */ true))
return false;
+ MemoryLocation SrcLoc =
+ MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
+ for (Instruction &I :
+ make_range(++C->getIterator(), C->getParent()->end())) {
+ // Lifetime of srcAlloca ends at lifetime.end.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
+ II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
+ cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
+ break;
+ }
+
+ // Lifetime of srcAlloca ends at return.
+ if (isa<ReturnInst>(&I))
+ break;
+
+ // Ignore the direct read of src in the load.
+ if (&I == cpyLoad)
+ continue;
+
+ // Check whether this instruction may mod/ref src through the captured
+ // pointer (we have already any direct mod/refs in the loop above).
+ // Also bail if we hit a terminator, as we don't want to scan into other
+ // blocks.
+ if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
+ return false;
+ }
+ }
+
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
if (!DT->dominates(cpyDest, C)) {
@@ -1018,6 +1074,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
combineMetadata(C, cpyLoad, KnownIDs, true);
+ if (cpyLoad != cpyStore)
+ combineMetadata(C, cpyStore, KnownIDs, true);
++NumCallSlot;
return true;
@@ -1043,8 +1101,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
if (MDep->getLength() != M->getLength()) {
- ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
- ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ auto *MLen = dyn_cast<ConstantInt>(M->getLength());
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
}
@@ -1163,7 +1221,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
const unsigned DestAlign =
std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
if (DestAlign > 1)
- if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
+ if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
IRBuilder<> Builder(MemCpy);
@@ -1211,12 +1269,11 @@ static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
if (MSSA->isLiveOnEntryDef(Def))
return isa<AllocaInst>(getUnderlyingObject(V));
- if (IntrinsicInst *II =
- dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+ auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
- if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
+ if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
if (AA->isMustAlias(V, II->getArgOperand(1)) &&
LTSize->getZExtValue() >= CSize->getZExtValue())
return true;
@@ -1226,12 +1283,14 @@ static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
// does) and we're querying a pointer based on that alloca, then we know
// the memory is definitely undef, regardless of how exactly we alias.
// The size also doesn't matter, as an out-of-bounds access would be UB.
- AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
- if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
- const DataLayout &DL = Alloca->getModule()->getDataLayout();
- if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
- if (*AllocaSize == LTSize->getValue() * 8)
- return true;
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
+ if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
+ const DataLayout &DL = Alloca->getModule()->getDataLayout();
+ if (Optional<TypeSize> AllocaSize =
+ Alloca->getAllocationSizeInBits(DL))
+ if (*AllocaSize == LTSize->getValue() * 8)
+ return true;
+ }
}
}
}
@@ -1266,12 +1325,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// Don't worry about sizes larger than i64.
// A known memset size is required.
- ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
+ auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
if (!CMemSetSize)
return false;
// A known memcpy size is also required.
- ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
+ auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
if (!CCopySize)
return false;
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
@@ -1323,7 +1382,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
// If copying from a constant, try to turn the memcpy into a memset.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
M->getModule()->getDataLayout())) {
@@ -1370,7 +1429,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// d) memcpy from a just-memset'd source can be turned into memset.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
if (auto *C = dyn_cast<CallInst>(MI)) {
// The memcpy must post-dom the call. Limit to the same block for
// now. Additionally, we need to ensure that there are no accesses
@@ -1469,7 +1528,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
- ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
if (!C1 || !TypeSize::isKnownGE(
TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
return false;
@@ -1540,13 +1599,13 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool RepeatInstruction = false;
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (auto *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
- else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ else if (auto *M = dyn_cast<MemSetInst>(I))
RepeatInstruction = processMemSet(M, BI);
- else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+ else if (auto *M = dyn_cast<MemCpyInst>(I))
RepeatInstruction = processMemCpy(M, BI);
- else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ else if (auto *M = dyn_cast<MemMoveInst>(I))
RepeatInstruction = processMemMove(M);
else if (auto *CB = dyn_cast<CallBase>(I)) {
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)