diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/SROA.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 206 |
1 files changed, 173 insertions, 33 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index c738a2a6f39a..29240aaaa21b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -43,6 +43,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -55,7 +56,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -84,6 +84,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -198,7 +199,9 @@ class SROA { SmallSetVector<AllocaInst *, 16> PostPromotionWorklist; /// A collection of alloca instructions we can directly promote. - std::vector<AllocaInst *> PromotableAllocas; + SetVector<AllocaInst *, SmallVector<AllocaInst *>, + SmallPtrSet<AllocaInst *, 16>, 16> + PromotableAllocas; /// A worklist of PHIs to speculate prior to promoting allocas. /// @@ -245,6 +248,7 @@ private: bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P); bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); + bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS); std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI); void clobberUse(Use &U); bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas); @@ -427,7 +431,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, // discard the value component of this dbg.assign as the value cannot // be computed with the new fragment. Expr = *DIExpression::createFragmentExpression( - DIExpression::get(Expr->getContext(), std::nullopt), + DIExpression::get(Expr->getContext(), {}), NewFragment.OffsetInBits, NewFragment.SizeInBits); SetKillLocation = true; } @@ -443,8 +447,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, ::Value *NewValue = Value ? Value : DbgAssign->getValue(); auto *NewAssign = UnwrapDbgInstPtr( DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr, - Dest, - DIExpression::get(Expr->getContext(), std::nullopt), + Dest, DIExpression::get(Expr->getContext(), {}), DbgAssign->getDebugLoc()), DbgAssign); @@ -477,7 +480,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, // noted as slightly offset (in code) from the store. In practice this // should have little effect on the debugging experience due to the fact // that all the split stores should get the same line number. - NewAssign->moveBefore(DbgAssign); + NewAssign->moveBefore(DbgAssign->getIterator()); NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n"); @@ -598,6 +601,7 @@ public: /// If this is true, the slices are never fully built and should be /// ignored. bool isEscaped() const { return PointerEscapingInstr; } + bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; } /// Support for iterating over the slices. /// @{ @@ -680,6 +684,7 @@ private: /// store a pointer to that here and abort trying to form slices of the /// alloca. This will be null if the alloca slices are analyzed successfully. Instruction *PointerEscapingInstr; + Instruction *PointerEscapingInstrReadOnly; /// The slices of the alloca. /// @@ -1390,6 +1395,19 @@ private: /// Disable SROA entirely if there are unhandled users of the alloca. void visitInstruction(Instruction &I) { PI.setAborted(&I); } + + void visitCallBase(CallBase &CB) { + // If the call operand is NoCapture ReadOnly, then we mark it as + // EscapedReadOnly. + if (CB.isDataOperand(U) && + CB.doesNotCapture(U->getOperandNo()) && + CB.onlyReadsMemory(U->getOperandNo())) { + PI.setEscapedReadOnly(&CB); + return; + } + + Base::visitCallBase(CB); + } }; AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) @@ -1397,7 +1415,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(nullptr) { + PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -1408,6 +1426,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) assert(PointerEscapingInstr && "Did not track a bad instruction"); return; } + PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst(); llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); @@ -1445,6 +1464,9 @@ void AllocaSlices::print(raw_ostream &OS) const { return; } + if (PointerEscapingInstrReadOnly) + OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n"; + OS << "Slices of alloca: " << AI << "\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) print(OS, I); @@ -1821,7 +1843,7 @@ static void rewriteMemOpOfSelect(SelectInst &SI, T &I, CondMemOp.dropUBImplyingAttrsAndMetadata(); ++NumLoadsSpeculated; } - CondMemOp.insertBefore(NewMemOpBB->getTerminator()); + CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator()); Value *Ptr = SI.getOperand(1 + SuccIdx); CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr); if (isa<LoadInst>(I)) { @@ -3802,6 +3824,12 @@ private: struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { AAMDNodes AATags; + // A vector to hold the split components that we want to emit + // separate fake uses for. + SmallVector<Value *, 4> Components; + // A vector to hold all the fake uses of the struct that we are splitting. + // Usually there should only be one, but we are handling the general case. + SmallVector<Instruction *, 1> FakeUses; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, @@ -3826,10 +3854,32 @@ private: GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) Load->setAAMetadata( AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); + // Record the load so we can generate a fake use for this aggregate + // component. + Components.push_back(Load); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } + + // Stash the fake uses that use the value generated by this instruction. + void recordFakeUses(LoadInst &LI) { + for (Use &U : LI.uses()) + if (auto *II = dyn_cast<IntrinsicInst>(U.getUser())) + if (II->getIntrinsicID() == Intrinsic::fake_use) + FakeUses.push_back(II); + } + + // Replace all fake uses of the aggregate with a series of fake uses, one + // for each split component. + void emitFakeUses() { + for (Instruction *I : FakeUses) { + IRB.SetInsertPoint(I); + for (auto *V : Components) + IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V}); + I->eraseFromParent(); + } + } }; bool visitLoadInst(LoadInst &LI) { @@ -3841,8 +3891,10 @@ private: LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(), getAdjustedAlignment(&LI, 0), DL, IRB); + Splitter.recordFakeUses(LI); Value *V = PoisonValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); + Splitter.emitFakeUses(); Visited.erase(&LI); LI.replaceAllUsesWith(V); LI.eraseFromParent(); @@ -4769,9 +4821,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Finally, don't try to promote any allocas that new require re-splitting. // They have already been added to the worklist above. - llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) { - return ResplitPromotableAllocas.count(AI); - }); + PromotableAllocas.set_subtract(ResplitPromotableAllocas); return true; } @@ -4933,7 +4983,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } if (PHIUsers.empty() && SelectUsers.empty()) { // Promote the alloca. - PromotableAllocas.push_back(NewAI); + PromotableAllocas.insert(NewAI); } else { // If we have either PHIs or Selects to speculate, add them to those // worklists and re-queue the new alloca so that we promote in on the @@ -4977,8 +5027,6 @@ const Value *getAddress(const DbgVariableIntrinsic *DVI) { } const Value *getAddress(const DbgVariableRecord *DVR) { - assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || - DVR->getType() == DbgVariableRecord::LocationType::Assign); return DVR->getAddress(); } @@ -4989,8 +5037,6 @@ bool isKillAddress(const DbgVariableIntrinsic *DVI) { } bool isKillAddress(const DbgVariableRecord *DVR) { - assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || - DVR->getType() == DbgVariableRecord::LocationType::Assign); if (DVR->getType() == DbgVariableRecord::LocationType::Assign) return DVR->isKillAddress(); return DVR->isKillLocation(); @@ -5003,8 +5049,6 @@ const DIExpression *getAddressExpression(const DbgVariableIntrinsic *DVI) { } const DIExpression *getAddressExpression(const DbgVariableRecord *DVR) { - assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || - DVR->getType() == DbgVariableRecord::LocationType::Assign); if (DVR->getType() == DbgVariableRecord::LocationType::Assign) return DVR->getAddressExpression(); return DVR->getExpression(); @@ -5144,11 +5188,9 @@ insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig, AllocaInst *NewAddr, DIAssignID::getDistinct(NewAddr->getContext())); } - Instruction *NewAssign = - DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(), - NewFragmentExpr, NewAddr, NewAddrExpr, - Orig->getDebugLoc()) - .get<Instruction *>(); + Instruction *NewAssign = cast<Instruction *>(DIB.insertDbgAssign( + NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr, + NewAddrExpr, Orig->getDebugLoc())); LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n"); (void)NewAssign; } @@ -5187,6 +5229,19 @@ insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, return; } + if (Orig->isDbgValue()) { + DbgVariableRecord *DVR = DbgVariableRecord::createDbgVariableRecord( + NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc()); + // Drop debug information if the expression doesn't start with a + // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value + // describes the address of alloca rather than the value inside the alloca. + if (!NewFragmentExpr->startsWithDeref()) + DVR->setKillAddress(); + BeforeInst->getParent()->insertDbgRecordBefore(DVR, + BeforeInst->getIterator()); + return; + } + // Apply a DIAssignID to the store if it doesn't already have it. if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) { NewAddr->setMetadata(LLVMContext::MD_DIAssignID, @@ -5389,7 +5444,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { }; for_each(findDbgDeclares(Fragment.Alloca), RemoveOne); for_each(findDVRDeclares(Fragment.Alloca), RemoveOne); - + for_each(findDVRValues(Fragment.Alloca), RemoveOne); insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI, NewDbgFragment, BitExtractOffset); } @@ -5399,6 +5454,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // and the individual partitions. for_each(findDbgDeclares(&AI), MigrateOne); for_each(findDVRDeclares(&AI), MigrateOne); + for_each(findDVRValues(&AI), MigrateOne); for_each(at::getAssignmentMarkers(&AI), MigrateOne); for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne); @@ -5420,6 +5476,88 @@ void SROA::clobberUse(Use &U) { } } +/// A basic LoadAndStorePromoter that does not remove store nodes. +class BasicLoadAndStorePromoter : public LoadAndStorePromoter { +public: + BasicLoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S, + Type *ZeroType) + : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {} + bool shouldDelete(Instruction *I) const override { + return !isa<StoreInst>(I) && !isa<AllocaInst>(I); + } + + Value *getValueToUseForAlloca(Instruction *I) const override { + return UndefValue::get(ZeroType); + } + +private: + Type *ZeroType; +}; + +bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) { + // Look through each "partition", looking for slices with the same start/end + // that do not overlap with any before them. The slices are sorted by + // increasing beginOffset. We don't use AS.partitions(), as it will use a more + // sophisticated algorithm that takes splittable slices into account. + auto PartitionBegin = AS.begin(); + auto PartitionEnd = PartitionBegin; + uint64_t BeginOffset = PartitionBegin->beginOffset(); + uint64_t EndOffset = PartitionBegin->endOffset(); + while (PartitionBegin != AS.end()) { + bool AllSameAndValid = true; + SmallVector<Instruction *> Insts; + Type *PartitionType = nullptr; + while (PartitionEnd != AS.end() && + (PartitionEnd->beginOffset() < EndOffset || + PartitionEnd->endOffset() <= EndOffset)) { + if (AllSameAndValid) { + AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset && + PartitionEnd->endOffset() == EndOffset; + Instruction *User = + cast<Instruction>(PartitionEnd->getUse()->getUser()); + if (auto *LI = dyn_cast<LoadInst>(User)) { + Type *UserTy = LI->getType(); + // LoadAndStorePromoter requires all the types to be the same. + if (!LI->isSimple() || (PartitionType && UserTy != PartitionType)) + AllSameAndValid = false; + PartitionType = UserTy; + Insts.push_back(User); + } else if (auto *SI = dyn_cast<StoreInst>(User)) { + Type *UserTy = SI->getValueOperand()->getType(); + if (!SI->isSimple() || (PartitionType && UserTy != PartitionType)) + AllSameAndValid = false; + PartitionType = UserTy; + Insts.push_back(User); + } else if (!isAssumeLikeIntrinsic(User)) { + AllSameAndValid = false; + } + } + EndOffset = std::max(EndOffset, PartitionEnd->endOffset()); + ++PartitionEnd; + } + + // So long as all the slices start and end offsets matched, update loads to + // the values stored in the partition. + if (AllSameAndValid && !Insts.empty()) { + LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", " + << EndOffset << ")\n"); + SmallVector<PHINode *, 4> NewPHIs; + SSAUpdater SSA(&NewPHIs); + Insts.push_back(&AI); + BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType); + Promoter.run(Insts); + } + + // Step on to the next partition. + PartitionBegin = PartitionEnd; + if (PartitionBegin == AS.end()) + break; + BeginOffset = PartitionBegin->beginOffset(); + EndOffset = PartitionBegin->endOffset(); + } + return true; +} + /// Analyze an alloca for SROA. /// /// This analyzes the alloca to ensure we can reason about it, builds @@ -5460,6 +5598,11 @@ SROA::runOnAlloca(AllocaInst &AI) { if (AS.isEscaped()) return {Changed, CFGChanged}; + if (AS.isEscapedReadOnly()) { + Changed |= propagateStoredValuesToLoads(AI, AS); + return {Changed, CFGChanged}; + } + // Delete all the dead users of this alloca before splitting and rewriting it. for (Instruction *DeadUser : AS.getDeadUsers()) { // Free up everything used by this instruction. @@ -5545,7 +5688,6 @@ bool SROA::deleteDeadInstructions( } return Changed; } - /// Promote the allocas, using the best available technique. /// /// This attempts to promote whatever allocas have been identified as viable in @@ -5555,13 +5697,12 @@ bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; - NumPromoted += PromotableAllocas.size(); - if (SROASkipMem2Reg) { LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n"); } else { LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); + NumPromoted += PromotableAllocas.size(); + PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC); } PromotableAllocas.clear(); @@ -5578,7 +5719,7 @@ std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) { if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() && isAllocaPromotable(AI)) - PromotableAllocas.push_back(AI); + PromotableAllocas.insert(AI); else Worklist.insert(AI); } @@ -5602,10 +5743,9 @@ std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) { // Remove the deleted allocas from various lists so that we don't try to // continue processing them. if (!DeletedAllocas.empty()) { - auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; - Worklist.remove_if(IsInSet); - PostPromotionWorklist.remove_if(IsInSet); - llvm::erase_if(PromotableAllocas, IsInSet); + Worklist.set_subtract(DeletedAllocas); + PostPromotionWorklist.set_subtract(DeletedAllocas); + PromotableAllocas.set_subtract(DeletedAllocas); DeletedAllocas.clear(); } } |
