diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 149 |
1 files changed, 104 insertions, 45 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 67335a45fb58..6698db26626b 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" @@ -171,7 +172,7 @@ public: bool empty() const { return Ranges.empty(); } void addInst(int64_t OffsetFromFirst, Instruction *Inst) { - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + if (auto *SI = dyn_cast<StoreInst>(Inst)) addStore(OffsetFromFirst, SI); else addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); @@ -312,15 +313,21 @@ INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End) { assert(Start->getParent() == End->getParent() && "Must be in same block"); - if (!Start->getFunction()->doesNotThrow() && - !isa<AllocaInst>(getUnderlyingObject(V))) { - for (const Instruction &I : - make_range(Start->getIterator(), End->getIterator())) { - if (I.mayThrow()) - return true; - } - } - return false; + // Function can't unwind, so it also can't be visible through unwinding. + if (Start->getFunction()->doesNotThrow()) + return false; + + // Object is not visible on unwind. + // TODO: Support RequiresNoCaptureBeforeUnwind case. + bool RequiresNoCaptureBeforeUnwind; + if (isNotVisibleOnUnwind(getUnderlyingObject(V), + RequiresNoCaptureBeforeUnwind) && + !RequiresNoCaptureBeforeUnwind) + return false; + + // Check whether there are any unwinding instructions in the range. + return any_of(make_range(Start->getIterator(), End->getIterator()), + [](const Instruction &I) { return I.mayThrow(); }); } void MemCpyOptPass::eraseInstruction(Instruction *I) { @@ -364,7 +371,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, const DataLayout &DL = StartInst->getModule()->getDataLayout(); // We can't track scalable types - if (StoreInst *SI = dyn_cast<StoreInst>(StartInst)) + if (auto *SI = dyn_cast<StoreInst>(StartInst)) if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable()) return nullptr; @@ -410,7 +417,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, continue; } - if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { + if (auto *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; @@ -440,7 +447,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Ranges.addStore(*Offset, NextStore); } else { - MemSetInst *MSI = cast<MemSetInst>(BI); + auto *MSI = cast<MemSetInst>(BI); if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength())) @@ -661,7 +668,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { return false; // Load to store forwarding can be interpreted as memcpy. - if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + if (auto *LI = dyn_cast<LoadInst>(StoredVal)) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { @@ -871,7 +878,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, return false; // Require that src be an alloca. This simplifies the reasoning considerably. - AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); + auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; @@ -890,8 +897,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize), - DL, C, DT)) + DL, C, DT)) { + LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n"); return false; + } // Make sure that nothing can observe cpyDest being written early. There are // a number of cases to consider: @@ -907,8 +916,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // guaranteed to be executed if C is. As it is a non-atomic access, it // renders accesses from other threads undefined. // TODO: This is currently not checked. - if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) + if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) { + LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding"); return false; + } // Check that dest points to memory that is at least as aligned as src. Align srcAlign = srcAlloca->getAlign(); @@ -930,14 +941,14 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, append_range(srcUseList, U->users()); continue; } - if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { + if (const auto *G = dyn_cast<GetElementPtrInst>(U)) { if (!G->hasAllZeroIndices()) return false; append_range(srcUseList, U->users()); continue; } - if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) + if (const auto *IT = dyn_cast<IntrinsicInst>(U)) if (IT->isLifetimeStartOrEnd()) continue; @@ -945,12 +956,57 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, return false; } - // Check that src isn't captured by the called function since the - // transformation can cause aliasing issues in that case. - for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI) - if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI)) + // Check whether src is captured by the called function, in which case there + // may be further indirect uses of src. + bool SrcIsCaptured = any_of(C->args(), [&](Use &U) { + return U->stripPointerCasts() == cpySrc && + !C->doesNotCapture(C->getArgOperandNo(&U)); + }); + + // If src is captured, then check whether there are any potential uses of + // src through the captured pointer before the lifetime of src ends, either + // due to a lifetime.end or a return from the function. + if (SrcIsCaptured) { + // Check that dest is not captured before/at the call. We have already + // checked that src is not captured before it. If either had been captured, + // then the call might be comparing the argument against the captured dest + // or src pointer. + Value *DestObj = getUnderlyingObject(cpyDest); + if (!isIdentifiedFunctionLocal(DestObj) || + PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, + /* StoreCaptures */ true, C, DT, + /* IncludeI */ true)) return false; + MemoryLocation SrcLoc = + MemoryLocation(srcAlloca, LocationSize::precise(srcSize)); + for (Instruction &I : + make_range(++C->getIterator(), C->getParent()->end())) { + // Lifetime of srcAlloca ends at lifetime.end. + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end && + II->getArgOperand(1)->stripPointerCasts() == srcAlloca && + cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize)) + break; + } + + // Lifetime of srcAlloca ends at return. + if (isa<ReturnInst>(&I)) + break; + + // Ignore the direct read of src in the load. + if (&I == cpyLoad) + continue; + + // Check whether this instruction may mod/ref src through the captured + // pointer (we have already any direct mod/refs in the loop above). + // Also bail if we hit a terminator, as we don't want to scan into other + // blocks. + if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator()) + return false; + } + } + // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. if (!DT->dominates(cpyDest, C)) { @@ -1018,6 +1074,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; combineMetadata(C, cpyLoad, KnownIDs, true); + if (cpyLoad != cpyStore) + combineMetadata(C, cpyStore, KnownIDs, true); ++NumCallSlot; return true; @@ -1043,8 +1101,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. if (MDep->getLength() != M->getLength()) { - ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); - ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); + auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); + auto *MLen = dyn_cast<ConstantInt>(M->getLength()); if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; } @@ -1163,7 +1221,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, const unsigned DestAlign = std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); if (DestAlign > 1) - if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) + if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); IRBuilder<> Builder(MemCpy); @@ -1211,12 +1269,11 @@ static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, if (MSSA->isLiveOnEntryDef(Def)) return isa<AllocaInst>(getUnderlyingObject(V)); - if (IntrinsicInst *II = - dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { + if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { - ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); + auto *LTSize = cast<ConstantInt>(II->getArgOperand(0)); - if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) { + if (auto *CSize = dyn_cast<ConstantInt>(Size)) { if (AA->isMustAlias(V, II->getArgOperand(1)) && LTSize->getZExtValue() >= CSize->getZExtValue()) return true; @@ -1226,12 +1283,14 @@ static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, // does) and we're querying a pointer based on that alloca, then we know // the memory is definitely undef, regardless of how exactly we alias. // The size also doesn't matter, as an out-of-bounds access would be UB. - AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)); - if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) { - const DataLayout &DL = Alloca->getModule()->getDataLayout(); - if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL)) - if (*AllocaSize == LTSize->getValue() * 8) - return true; + if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) { + if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) { + const DataLayout &DL = Alloca->getModule()->getDataLayout(); + if (Optional<TypeSize> AllocaSize = + Alloca->getAllocationSizeInBits(DL)) + if (*AllocaSize == LTSize->getValue() * 8) + return true; + } } } } @@ -1266,12 +1325,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, // Don't worry about sizes larger than i64. // A known memset size is required. - ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); + auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); if (!CMemSetSize) return false; // A known memcpy size is also required. - ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize); + auto *CCopySize = dyn_cast<ConstantInt>(CopySize); if (!CCopySize) return false; if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) { @@ -1323,7 +1382,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } // If copying from a constant, try to turn the memcpy into a memset. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) + if (auto *GV = dyn_cast<GlobalVariable>(M->getSource())) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getModule()->getDataLayout())) { @@ -1370,7 +1429,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // d) memcpy from a just-memset'd source can be turned into memset. if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { - if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) { + if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) { if (auto *C = dyn_cast<CallInst>(MI)) { // The memcpy must post-dom the call. Limit to the same block for // now. Additionally, we need to ensure that there are no accesses @@ -1469,7 +1528,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { return false; // The length of the memcpy must be larger or equal to the size of the byval. - ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); + auto *C1 = dyn_cast<ConstantInt>(MDep->getLength()); if (!C1 || !TypeSize::isKnownGE( TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize)) return false; @@ -1540,13 +1599,13 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { bool RepeatInstruction = false; - if (StoreInst *SI = dyn_cast<StoreInst>(I)) + if (auto *SI = dyn_cast<StoreInst>(I)) MadeChange |= processStore(SI, BI); - else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) + else if (auto *M = dyn_cast<MemSetInst>(I)) RepeatInstruction = processMemSet(M, BI); - else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) + else if (auto *M = dyn_cast<MemCpyInst>(I)) RepeatInstruction = processMemCpy(M, BI); - else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) + else if (auto *M = dyn_cast<MemMoveInst>(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast<CallBase>(I)) { for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) |