aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/SROA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/SROA.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp206
1 files changed, 173 insertions, 33 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index c738a2a6f39a..29240aaaa21b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -43,6 +43,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -55,7 +56,6 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -84,6 +84,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -198,7 +199,9 @@ class SROA {
SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
/// A collection of alloca instructions we can directly promote.
- std::vector<AllocaInst *> PromotableAllocas;
+ SetVector<AllocaInst *, SmallVector<AllocaInst *>,
+ SmallPtrSet<AllocaInst *, 16>, 16>
+ PromotableAllocas;
/// A worklist of PHIs to speculate prior to promoting allocas.
///
@@ -245,6 +248,7 @@ private:
bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
+ bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
@@ -427,7 +431,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
// discard the value component of this dbg.assign as the value cannot
// be computed with the new fragment.
Expr = *DIExpression::createFragmentExpression(
- DIExpression::get(Expr->getContext(), std::nullopt),
+ DIExpression::get(Expr->getContext(), {}),
NewFragment.OffsetInBits, NewFragment.SizeInBits);
SetKillLocation = true;
}
@@ -443,8 +447,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
::Value *NewValue = Value ? Value : DbgAssign->getValue();
auto *NewAssign = UnwrapDbgInstPtr(
DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
- Dest,
- DIExpression::get(Expr->getContext(), std::nullopt),
+ Dest, DIExpression::get(Expr->getContext(), {}),
DbgAssign->getDebugLoc()),
DbgAssign);
@@ -477,7 +480,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
// noted as slightly offset (in code) from the store. In practice this
// should have little effect on the debugging experience due to the fact
// that all the split stores should get the same line number.
- NewAssign->moveBefore(DbgAssign);
+ NewAssign->moveBefore(DbgAssign->getIterator());
NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
@@ -598,6 +601,7 @@ public:
/// If this is true, the slices are never fully built and should be
/// ignored.
bool isEscaped() const { return PointerEscapingInstr; }
+ bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
/// Support for iterating over the slices.
/// @{
@@ -680,6 +684,7 @@ private:
/// store a pointer to that here and abort trying to form slices of the
/// alloca. This will be null if the alloca slices are analyzed successfully.
Instruction *PointerEscapingInstr;
+ Instruction *PointerEscapingInstrReadOnly;
/// The slices of the alloca.
///
@@ -1390,6 +1395,19 @@ private:
/// Disable SROA entirely if there are unhandled users of the alloca.
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
+
+ void visitCallBase(CallBase &CB) {
+ // If the call operand is NoCapture ReadOnly, then we mark it as
+ // EscapedReadOnly.
+ if (CB.isDataOperand(U) &&
+ CB.doesNotCapture(U->getOperandNo()) &&
+ CB.onlyReadsMemory(U->getOperandNo())) {
+ PI.setEscapedReadOnly(&CB);
+ return;
+ }
+
+ Base::visitCallBase(CB);
+ }
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
@@ -1397,7 +1415,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(nullptr) {
+ PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -1408,6 +1426,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
}
+ PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
@@ -1445,6 +1464,9 @@ void AllocaSlices::print(raw_ostream &OS) const {
return;
}
+ if (PointerEscapingInstrReadOnly)
+ OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
+
OS << "Slices of alloca: " << AI << "\n";
for (const_iterator I = begin(), E = end(); I != E; ++I)
print(OS, I);
@@ -1821,7 +1843,7 @@ static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
CondMemOp.dropUBImplyingAttrsAndMetadata();
++NumLoadsSpeculated;
}
- CondMemOp.insertBefore(NewMemOpBB->getTerminator());
+ CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
Value *Ptr = SI.getOperand(1 + SuccIdx);
CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
if (isa<LoadInst>(I)) {
@@ -3802,6 +3824,12 @@ private:
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
AAMDNodes AATags;
+ // A vector to hold the split components that we want to emit
+ // separate fake uses for.
+ SmallVector<Value *, 4> Components;
+ // A vector to hold all the fake uses of the struct that we are splitting.
+ // Usually there should only be one, but we are handling the general case.
+ SmallVector<Instruction *, 1> FakeUses;
LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
@@ -3826,10 +3854,32 @@ private:
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Load->setAAMetadata(
AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
+ // Record the load so we can generate a fake use for this aggregate
+ // component.
+ Components.push_back(Load);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
+
+ // Stash the fake uses that use the value generated by this instruction.
+ void recordFakeUses(LoadInst &LI) {
+ for (Use &U : LI.uses())
+ if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
+ if (II->getIntrinsicID() == Intrinsic::fake_use)
+ FakeUses.push_back(II);
+ }
+
+ // Replace all fake uses of the aggregate with a series of fake uses, one
+ // for each split component.
+ void emitFakeUses() {
+ for (Instruction *I : FakeUses) {
+ IRB.SetInsertPoint(I);
+ for (auto *V : Components)
+ IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V});
+ I->eraseFromParent();
+ }
+ }
};
bool visitLoadInst(LoadInst &LI) {
@@ -3841,8 +3891,10 @@ private:
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL, IRB);
+ Splitter.recordFakeUses(LI);
Value *V = PoisonValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
+ Splitter.emitFakeUses();
Visited.erase(&LI);
LI.replaceAllUsesWith(V);
LI.eraseFromParent();
@@ -4769,9 +4821,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
- llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
- return ResplitPromotableAllocas.count(AI);
- });
+ PromotableAllocas.set_subtract(ResplitPromotableAllocas);
return true;
}
@@ -4933,7 +4983,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}
if (PHIUsers.empty() && SelectUsers.empty()) {
// Promote the alloca.
- PromotableAllocas.push_back(NewAI);
+ PromotableAllocas.insert(NewAI);
} else {
// If we have either PHIs or Selects to speculate, add them to those
// worklists and re-queue the new alloca so that we promote in on the
@@ -4977,8 +5027,6 @@ const Value *getAddress(const DbgVariableIntrinsic *DVI) {
}
const Value *getAddress(const DbgVariableRecord *DVR) {
- assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
- DVR->getType() == DbgVariableRecord::LocationType::Assign);
return DVR->getAddress();
}
@@ -4989,8 +5037,6 @@ bool isKillAddress(const DbgVariableIntrinsic *DVI) {
}
bool isKillAddress(const DbgVariableRecord *DVR) {
- assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
- DVR->getType() == DbgVariableRecord::LocationType::Assign);
if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
return DVR->isKillAddress();
return DVR->isKillLocation();
@@ -5003,8 +5049,6 @@ const DIExpression *getAddressExpression(const DbgVariableIntrinsic *DVI) {
}
const DIExpression *getAddressExpression(const DbgVariableRecord *DVR) {
- assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
- DVR->getType() == DbgVariableRecord::LocationType::Assign);
if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
return DVR->getAddressExpression();
return DVR->getExpression();
@@ -5144,11 +5188,9 @@ insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig, AllocaInst *NewAddr,
DIAssignID::getDistinct(NewAddr->getContext()));
}
- Instruction *NewAssign =
- DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(),
- NewFragmentExpr, NewAddr, NewAddrExpr,
- Orig->getDebugLoc())
- .get<Instruction *>();
+ Instruction *NewAssign = cast<Instruction *>(DIB.insertDbgAssign(
+ NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
+ NewAddrExpr, Orig->getDebugLoc()));
LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
(void)NewAssign;
}
@@ -5187,6 +5229,19 @@ insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr,
return;
}
+ if (Orig->isDbgValue()) {
+ DbgVariableRecord *DVR = DbgVariableRecord::createDbgVariableRecord(
+ NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
+ // Drop debug information if the expression doesn't start with a
+ // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
+ // describes the address of alloca rather than the value inside the alloca.
+ if (!NewFragmentExpr->startsWithDeref())
+ DVR->setKillAddress();
+ BeforeInst->getParent()->insertDbgRecordBefore(DVR,
+ BeforeInst->getIterator());
+ return;
+ }
+
// Apply a DIAssignID to the store if it doesn't already have it.
if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
@@ -5389,7 +5444,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
};
for_each(findDbgDeclares(Fragment.Alloca), RemoveOne);
for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
-
+ for_each(findDVRValues(Fragment.Alloca), RemoveOne);
insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
NewDbgFragment, BitExtractOffset);
}
@@ -5399,6 +5454,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// and the individual partitions.
for_each(findDbgDeclares(&AI), MigrateOne);
for_each(findDVRDeclares(&AI), MigrateOne);
+ for_each(findDVRValues(&AI), MigrateOne);
for_each(at::getAssignmentMarkers(&AI), MigrateOne);
for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
@@ -5420,6 +5476,88 @@ void SROA::clobberUse(Use &U) {
}
}
+/// A basic LoadAndStorePromoter that does not remove store nodes.
+class BasicLoadAndStorePromoter : public LoadAndStorePromoter {
+public:
+ BasicLoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S,
+ Type *ZeroType)
+ : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
+ bool shouldDelete(Instruction *I) const override {
+ return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
+ }
+
+ Value *getValueToUseForAlloca(Instruction *I) const override {
+ return UndefValue::get(ZeroType);
+ }
+
+private:
+ Type *ZeroType;
+};
+
+bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
+ // Look through each "partition", looking for slices with the same start/end
+ // that do not overlap with any before them. The slices are sorted by
+ // increasing beginOffset. We don't use AS.partitions(), as it will use a more
+ // sophisticated algorithm that takes splittable slices into account.
+ auto PartitionBegin = AS.begin();
+ auto PartitionEnd = PartitionBegin;
+ uint64_t BeginOffset = PartitionBegin->beginOffset();
+ uint64_t EndOffset = PartitionBegin->endOffset();
+ while (PartitionBegin != AS.end()) {
+ bool AllSameAndValid = true;
+ SmallVector<Instruction *> Insts;
+ Type *PartitionType = nullptr;
+ while (PartitionEnd != AS.end() &&
+ (PartitionEnd->beginOffset() < EndOffset ||
+ PartitionEnd->endOffset() <= EndOffset)) {
+ if (AllSameAndValid) {
+ AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
+ PartitionEnd->endOffset() == EndOffset;
+ Instruction *User =
+ cast<Instruction>(PartitionEnd->getUse()->getUser());
+ if (auto *LI = dyn_cast<LoadInst>(User)) {
+ Type *UserTy = LI->getType();
+ // LoadAndStorePromoter requires all the types to be the same.
+ if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
+ AllSameAndValid = false;
+ PartitionType = UserTy;
+ Insts.push_back(User);
+ } else if (auto *SI = dyn_cast<StoreInst>(User)) {
+ Type *UserTy = SI->getValueOperand()->getType();
+ if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
+ AllSameAndValid = false;
+ PartitionType = UserTy;
+ Insts.push_back(User);
+ } else if (!isAssumeLikeIntrinsic(User)) {
+ AllSameAndValid = false;
+ }
+ }
+ EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
+ ++PartitionEnd;
+ }
+
+ // So long as all the slices start and end offsets matched, update loads to
+ // the values stored in the partition.
+ if (AllSameAndValid && !Insts.empty()) {
+ LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
+ << EndOffset << ")\n");
+ SmallVector<PHINode *, 4> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ Insts.push_back(&AI);
+ BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
+ Promoter.run(Insts);
+ }
+
+ // Step on to the next partition.
+ PartitionBegin = PartitionEnd;
+ if (PartitionBegin == AS.end())
+ break;
+ BeginOffset = PartitionBegin->beginOffset();
+ EndOffset = PartitionBegin->endOffset();
+ }
+ return true;
+}
+
/// Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
@@ -5460,6 +5598,11 @@ SROA::runOnAlloca(AllocaInst &AI) {
if (AS.isEscaped())
return {Changed, CFGChanged};
+ if (AS.isEscapedReadOnly()) {
+ Changed |= propagateStoredValuesToLoads(AI, AS);
+ return {Changed, CFGChanged};
+ }
+
// Delete all the dead users of this alloca before splitting and rewriting it.
for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.
@@ -5545,7 +5688,6 @@ bool SROA::deleteDeadInstructions(
}
return Changed;
}
-
/// Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
@@ -5555,13 +5697,12 @@ bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
- NumPromoted += PromotableAllocas.size();
-
if (SROASkipMem2Reg) {
LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
} else {
LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC);
+ NumPromoted += PromotableAllocas.size();
+ PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
}
PromotableAllocas.clear();
@@ -5578,7 +5719,7 @@ std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
isAllocaPromotable(AI))
- PromotableAllocas.push_back(AI);
+ PromotableAllocas.insert(AI);
else
Worklist.insert(AI);
}
@@ -5602,10 +5743,9 @@ std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
// Remove the deleted allocas from various lists so that we don't try to
// continue processing them.
if (!DeletedAllocas.empty()) {
- auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
- Worklist.remove_if(IsInSet);
- PostPromotionWorklist.remove_if(IsInSet);
- llvm::erase_if(PromotableAllocas, IsInSet);
+ Worklist.set_subtract(DeletedAllocas);
+ PostPromotionWorklist.set_subtract(DeletedAllocas);
+ PromotableAllocas.set_subtract(DeletedAllocas);
DeletedAllocas.clear();
}
}