diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 1518 |
1 files changed, 1063 insertions, 455 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index f55501a05d85..e1e0d50979dc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -20,16 +20,18 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/StackLifetime.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/circular_raw_ostream.h" #include "llvm/Support/OptimizedStructLayout.h" +#include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -41,6 +43,13 @@ using namespace llvm; // "coro-frame", which results in leaner debug spew. #define DEBUG_TYPE "coro-suspend-crossing" +static cl::opt<bool> EnableReuseStorageInFrame( + "reuse-storage-in-coroutine-frame", cl::Hidden, + cl::desc( + "Enable the optimization which would reuse the storage in the coroutine \ + frame for allocas whose liferanges are not overlapped, for testing purposes"), + llvm::cl::init(false)); + enum { SmallVectorThreshold = 32 }; // Provides two way mapping between the blocks and numbers. @@ -126,10 +135,10 @@ struct SuspendCrossingInfo { BasicBlock *UseBB = I->getParent(); - // As a special case, treat uses by an llvm.coro.suspend.retcon - // as if they were uses in the suspend's single predecessor: the - // uses conceptually occur before the suspend. - if (isa<CoroSuspendRetconInst>(I)) { + // As a special case, treat uses by an llvm.coro.suspend.retcon or an + // llvm.coro.suspend.async as if they were uses in the suspend's single + // predecessor: the uses conceptually occur before the suspend. + if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) { UseBB = UseBB->getSinglePredecessor(); assert(UseBB && "should have split coro.suspend into its own block"); } @@ -283,71 +292,96 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) #undef DEBUG_TYPE // "coro-suspend-crossing" #define DEBUG_TYPE "coro-frame" -// We build up the list of spills for every case where a use is separated -// from the definition by a suspend point. - -static const unsigned InvalidFieldIndex = ~0U; - namespace { -class Spill { - Value *Def = nullptr; - Instruction *User = nullptr; - unsigned FieldNo = InvalidFieldIndex; - -public: - Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {} - - Value *def() const { return Def; } - Instruction *user() const { return User; } - BasicBlock *userBlock() const { return User->getParent(); } +class FrameTypeBuilder; +// Mapping from the to-be-spilled value to all the users that need reload. +using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>; +struct AllocaInfo { + AllocaInst *Alloca; + DenseMap<Instruction *, llvm::Optional<APInt>> Aliases; + bool MayWriteBeforeCoroBegin; + AllocaInfo(AllocaInst *Alloca, + DenseMap<Instruction *, llvm::Optional<APInt>> Aliases, + bool MayWriteBeforeCoroBegin) + : Alloca(Alloca), Aliases(std::move(Aliases)), + MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} +}; +struct FrameDataInfo { + // All the values (that are not allocas) that needs to be spilled to the + // frame. + SpillInfo Spills; + // Allocas contains all values defined as allocas that need to live in the + // frame. + SmallVector<AllocaInfo, 8> Allocas; + + SmallVector<Value *, 8> getAllDefs() const { + SmallVector<Value *, 8> Defs; + for (const auto &P : Spills) + Defs.push_back(P.first); + for (const auto &A : Allocas) + Defs.push_back(A.Alloca); + return Defs; + } - // Note that field index is stored in the first SpillEntry for a particular - // definition. Subsequent mentions of a defintion do not have fieldNo - // assigned. This works out fine as the users of Spills capture the info about - // the definition the first time they encounter it. Consider refactoring - // SpillInfo into two arrays to normalize the spill representation. - unsigned fieldIndex() const { - assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); - return FieldNo; + uint32_t getFieldIndex(Value *V) const { + auto Itr = FieldIndexMap.find(V); + assert(Itr != FieldIndexMap.end() && + "Value does not have a frame field index"); + return Itr->second; } - void setFieldIndex(unsigned FieldNumber) { - assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); - FieldNo = FieldNumber; + + void setFieldIndex(Value *V, uint32_t Index) { + assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) && + "Cannot set the index for the same field twice."); + FieldIndexMap[V] = Index; } + + // Remap the index of every field in the frame, using the final layout index. + void updateLayoutIndex(FrameTypeBuilder &B); + +private: + // LayoutIndexUpdateStarted is used to avoid updating the index of any field + // twice by mistake. + bool LayoutIndexUpdateStarted = false; + // Map from values to their slot indexes on the frame. They will be first set + // with their original insertion field index. After the frame is built, their + // indexes will be updated into the final layout index. + DenseMap<Value *, uint32_t> FieldIndexMap; }; } // namespace -// Note that there may be more than one record with the same value of Def in -// the SpillInfo vector. -using SpillInfo = SmallVector<Spill, 8>; - #ifndef NDEBUG -static void dump(StringRef Title, SpillInfo const &Spills) { +static void dumpSpills(StringRef Title, const SpillInfo &Spills) { dbgs() << "------------- " << Title << "--------------\n"; - Value *CurrentValue = nullptr; - for (auto const &E : Spills) { - if (CurrentValue != E.def()) { - CurrentValue = E.def(); - CurrentValue->dump(); - } + for (const auto &E : Spills) { + E.first->dump(); dbgs() << " user: "; - E.user()->dump(); + for (auto *I : E.second) + I->dump(); + } +} + +static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) { + dbgs() << "------------- Allocas --------------\n"; + for (const auto &A : Allocas) { + A.Alloca->dump(); } } #endif namespace { +using FieldIDType = size_t; // We cannot rely solely on natural alignment of a type when building a // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert // padding. class FrameTypeBuilder { +private: struct Field { uint64_t Size; uint64_t Offset; - Spill *ForSpill; Type *Ty; - unsigned FieldIndex; + FieldIDType LayoutFieldIndex; Align Alignment; Align TyAlignment; }; @@ -363,19 +397,12 @@ class FrameTypeBuilder { public: FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} - - class FieldId { - size_t Value; - explicit FieldId(size_t Value) : Value(Value) {} - - friend class FrameTypeBuilder; - }; + : DL(DL), Context(Context) {} /// Add a field to this structure for the storage of an `alloca` /// instruction. - FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr, - bool IsHeader = false) { + LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI, + bool IsHeader = false) { Type *Ty = AI->getAllocatedType(); // Make an array type if this is a static array allocation. @@ -386,13 +413,42 @@ public: report_fatal_error("Coroutines cannot handle non static allocas yet"); } - return addField(Ty, AI->getAlign(), ForSpill, IsHeader); + return addField(Ty, AI->getAlign(), IsHeader); } + /// We want to put the allocas whose lifetime-ranges are not overlapped + /// into one slot of coroutine frame. + /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 + /// + /// cppcoro::task<void> alternative_paths(bool cond) { + /// if (cond) { + /// big_structure a; + /// process(a); + /// co_await something(); + /// } else { + /// big_structure b; + /// process2(b); + /// co_await something(); + /// } + /// } + /// + /// We want to put variable a and variable b in the same slot to + /// reduce the size of coroutine frame. + /// + /// This function use StackLifetime algorithm to partition the AllocaInsts in + /// Spills to non-overlapped sets in order to put Alloca in the same + /// non-overlapped set into the same slot in the Coroutine Frame. Then add + /// field for the allocas in the same non-overlapped set by using the largest + /// type as the field type. + /// + /// Side Effects: Because We sort the allocas, the order of allocas in the + /// frame may be different with the order in the source code. + void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, + coro::Shape &Shape); + /// Add a field to this structure. - FieldId addField(Type *Ty, MaybeAlign FieldAlignment, - Spill *ForSpill = nullptr, - bool IsHeader = false) { + LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, + bool IsHeader = false) { assert(!IsFinished && "adding fields to a finished builder"); assert(Ty && "must provide a type for a field"); @@ -415,9 +471,8 @@ public: Offset = OptimizedStructLayoutField::FlexibleOffset; } - Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, - *FieldAlignment, TyAlignment}); - return FieldId(Fields.size() - 1); + Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment}); + return Fields.size() - 1; } /// Finish the layout and set the body on the given type. @@ -433,13 +488,162 @@ public: return StructAlign; } - unsigned getFieldIndex(FieldId Id) const { + FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); - return Fields[Id.Value].FieldIndex; + return Fields[Id].LayoutFieldIndex; } }; } // namespace +void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { + auto Updater = [&](Value *I) { + setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I))); + }; + LayoutIndexUpdateStarted = true; + for (auto &S : Spills) + Updater(S.first); + for (const auto &A : Allocas) + Updater(A.Alloca); + LayoutIndexUpdateStarted = false; +} + +void FrameTypeBuilder::addFieldForAllocas(const Function &F, + FrameDataInfo &FrameData, + coro::Shape &Shape) { + DenseMap<AllocaInst *, unsigned int> AllocaIndex; + using AllocaSetType = SmallVector<AllocaInst *, 4>; + SmallVector<AllocaSetType, 4> NonOverlapedAllocas; + + // We need to add field for allocas at the end of this function. However, this + // function has multiple exits, so we use this helper to avoid redundant code. + struct RTTIHelper { + std::function<void()> func; + RTTIHelper(std::function<void()> &&func) : func(func) {} + ~RTTIHelper() { func(); } + } Helper([&]() { + for (auto AllocaList : NonOverlapedAllocas) { + auto *LargestAI = *AllocaList.begin(); + FieldIDType Id = addFieldForAlloca(LargestAI); + for (auto *Alloca : AllocaList) + FrameData.setFieldIndex(Alloca, Id); + } + }); + + if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; + AllocaIndex[Alloca] = NonOverlapedAllocas.size(); + NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); + } + return; + } + + // Because there are pathes from the lifetime.start to coro.end + // for each alloca, the liferanges for every alloca is overlaped + // in the blocks who contain coro.end and the successor blocks. + // So we choose to skip there blocks when we calculates the liferange + // for each alloca. It should be reasonable since there shouldn't be uses + // in these blocks and the coroutine frame shouldn't be used outside the + // coroutine body. + // + // Note that the user of coro.suspend may not be SwitchInst. However, this + // case seems too complex to handle. And it is harmless to skip these + // patterns since it just prevend putting the allocas to live in the same + // slot. + DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest; + for (auto CoroSuspendInst : Shape.CoroSuspends) { + for (auto U : CoroSuspendInst->users()) { + if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) { + auto *SWI = const_cast<SwitchInst *>(ConstSWI); + DefaultSuspendDest[SWI] = SWI->getDefaultDest(); + SWI->setDefaultDest(SWI->getSuccessor(1)); + } + } + } + + auto ExtractAllocas = [&]() { + AllocaSetType Allocas; + Allocas.reserve(FrameData.Allocas.size()); + for (const auto &A : FrameData.Allocas) + Allocas.push_back(A.Alloca); + return Allocas; + }; + StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(), + StackLifetime::LivenessType::May); + StackLifetimeAnalyzer.run(); + auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { + return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( + StackLifetimeAnalyzer.getLiveRange(AI2)); + }; + auto GetAllocaSize = [&](const AllocaInfo &A) { + Optional<TypeSize> RetSize = A.Alloca->getAllocationSizeInBits(DL); + assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n"); + assert(!RetSize->isScalable() && "Scalable vectors are not yet supported"); + return RetSize->getFixedSize(); + }; + // Put larger allocas in the front. So the larger allocas have higher + // priority to merge, which can save more space potentially. Also each + // AllocaSet would be ordered. So we can get the largest Alloca in one + // AllocaSet easily. + sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) { + return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); + }); + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; + bool Merged = false; + // Try to find if the Alloca is not inferenced with any existing + // NonOverlappedAllocaSet. If it is true, insert the alloca to that + // NonOverlappedAllocaSet. + for (auto &AllocaSet : NonOverlapedAllocas) { + assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); + bool NoInference = none_of(AllocaSet, [&](auto Iter) { + return IsAllocaInferenre(Alloca, Iter); + }); + // If the alignment of A is multiple of the alignment of B, the address + // of A should satisfy the requirement for aligning for B. + // + // There may be other more fine-grained strategies to handle the alignment + // infomation during the merging process. But it seems hard to handle + // these strategies and benefit little. + bool Alignable = [&]() -> bool { + auto *LargestAlloca = *AllocaSet.begin(); + return LargestAlloca->getAlign().value() % Alloca->getAlign().value() == + 0; + }(); + bool CouldMerge = NoInference && Alignable; + if (!CouldMerge) + continue; + AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()]; + AllocaSet.push_back(Alloca); + Merged = true; + break; + } + if (!Merged) { + AllocaIndex[Alloca] = NonOverlapedAllocas.size(); + NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); + } + } + // Recover the default target destination for each Switch statement + // reserved. + for (auto SwitchAndDefaultDest : DefaultSuspendDest) { + SwitchInst *SWI = SwitchAndDefaultDest.first; + BasicBlock *DestBB = SwitchAndDefaultDest.second; + SWI->setDefaultDest(DestBB); + } + // This Debug Info could tell us which allocas are merged into one slot. + LLVM_DEBUG(for (auto &AllocaSet + : NonOverlapedAllocas) { + if (AllocaSet.size() > 1) { + dbgs() << "In Function:" << F.getName() << "\n"; + dbgs() << "Find Union Set " + << "\n"; + dbgs() << "\tAllocas are \n"; + for (auto Alloca : AllocaSet) + dbgs() << "\t\t" << *Alloca << "\n"; + } + }); +} + void FrameTypeBuilder::finish(StructType *Ty) { assert(!IsFinished && "already finished!"); @@ -491,13 +695,8 @@ void FrameTypeBuilder::finish(StructType *Ty) { Offset - LastOffset)); } - // Record the layout information into both the Field and the - // original Spill, if there is one. F.Offset = Offset; - F.FieldIndex = FieldTypes.size(); - if (F.ForSpill) { - F.ForSpill->setFieldIndex(F.FieldIndex); - } + F.LayoutFieldIndex = FieldTypes.size(); FieldTypes.push_back(F.Ty); LastOffset = Offset + F.Size; @@ -509,8 +708,8 @@ void FrameTypeBuilder::finish(StructType *Ty) { // Check that the IR layout matches the offsets we expect. auto Layout = DL.getStructLayout(Ty); for (auto &F : Fields) { - assert(Ty->getElementType(F.FieldIndex) == F.Ty); - assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); + assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty); + assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset); } #endif @@ -526,7 +725,7 @@ void FrameTypeBuilder::finish(StructType *Ty) { // ... spills ... // }; static StructType *buildFrameType(Function &F, coro::Shape &Shape, - SpillInfo &Spills) { + FrameDataInfo &FrameData) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); StructType *FrameTy = [&] { @@ -538,8 +737,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, FrameTypeBuilder B(C, DL); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); - Optional<FrameTypeBuilder::FieldId> PromiseFieldId; - Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId; + Optional<FieldIDType> SwitchIndexFieldId; if (Shape.ABI == coro::ABI::Switch) { auto *FramePtrTy = FrameTy->getPointerTo(); @@ -549,14 +747,15 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, // Add header fields for the resume and destroy functions. // We can rely on these being perfectly packed. - B.addField(FnPtrTy, None, nullptr, /*header*/ true); - B.addField(FnPtrTy, None, nullptr, /*header*/ true); + (void)B.addField(FnPtrTy, None, /*header*/ true); + (void)B.addField(FnPtrTy, None, /*header*/ true); - // Add a header field for the promise if there is one. - if (PromiseAlloca) { - PromiseFieldId = - B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true); - } + // PromiseAlloca field needs to be explicitly added here because it's + // a header field with a fixed offset based on its alignment. Hence it + // needs special handling and cannot be added to FrameData.Allocas. + if (PromiseAlloca) + FrameData.setFieldIndex( + PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true)); // Add a field to store the suspend index. This doesn't need to // be in the header. @@ -568,40 +767,40 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } - Value *CurrentDef = nullptr; - + // Because multiple allocas may own the same field slot, + // we add allocas to field here. + B.addFieldForAllocas(F, FrameData, Shape); + // Add PromiseAlloca to Allocas list so that + // 1. updateLayoutIndex could update its index after + // `performOptimizedStructLayout` + // 2. it is processed in insertSpills. + if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) + // We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false); // Create an entry for every spilled value. - for (auto &S : Spills) { - // We can have multiple entries in Spills for a single value, but - // they should form a contiguous run. Ignore all but the first. - if (CurrentDef == S.def()) - continue; - - CurrentDef = S.def(); - - assert(CurrentDef != PromiseAlloca && - "recorded spill use of promise alloca?"); - - if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) { - B.addFieldForAlloca(AI, &S); - } else { - Type *Ty = CurrentDef->getType(); - B.addField(Ty, None, &S); - } + for (auto &S : FrameData.Spills) { + Type *FieldType = S.first->getType(); + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + if (const Argument *A = dyn_cast<Argument>(S.first)) + if (A->hasByValAttr()) + FieldType = FieldType->getPointerElementType(); + FieldIDType Id = B.addField(FieldType, None); + FrameData.setFieldIndex(S.first, Id); } B.finish(FrameTy); + FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); Shape.FrameSize = B.getStructSize(); switch (Shape.ABI) { - // In the switch ABI, remember the field indices for the promise and - // switch-index fields. case coro::ABI::Switch: + // In the switch ABI, remember the switch-index field. Shape.SwitchLowering.IndexField = - B.getFieldIndex(*SwitchIndexFieldId); - Shape.SwitchLowering.PromiseField = - (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); + B.getLayoutFieldIndex(*SwitchIndexFieldId); // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. @@ -617,65 +816,246 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, B.getStructAlign() <= Id->getStorageAlignment()); break; } + case coro::ABI::Async: { + Shape.AsyncLowering.FrameOffset = + alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); + // Also make the final context size a multiple of the context alignment to + // make allocation easier for allocators. + Shape.AsyncLowering.ContextSize = + alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize, + Shape.AsyncLowering.getContextAlignment()); + if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) { + report_fatal_error( + "The alignment requirment of frame variables cannot be higher than " + "the alignment of the async function context"); + } + break; + } } return FrameTy; } -// We use a pointer use visitor to discover if there are any writes into an -// alloca that dominates CoroBegin. If that is the case, insertSpills will copy -// the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. +// We use a pointer use visitor to track how an alloca is being used. +// The goal is to be able to answer the following three questions: +// 1. Should this alloca be allocated on the frame instead. +// 2. Could the content of the alloca be modified prior to CoroBegn, which would +// require copying the data from alloca to the frame after CoroBegin. +// 3. Is there any alias created for this alloca prior to CoroBegin, but used +// after CoroBegin. In that case, we will need to recreate the alias after +// CoroBegin based off the frame. To answer question 1, we track two things: +// a. List of all BasicBlocks that use this alloca or any of the aliases of +// the alloca. In the end, we check if there exists any two basic blocks that +// cross suspension points. If so, this alloca must be put on the frame. b. +// Whether the alloca or any alias of the alloca is escaped at some point, +// either by storing the address somewhere, or the address is used in a +// function call that might capture. If it's ever escaped, this alloca must be +// put on the frame conservatively. +// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin. +// Whenever a potential write happens, either through a store instruction, a +// function call or any of the memory intrinsics, we check whether this +// instruction is prior to CoroBegin. To answer question 3, we track the offsets +// of all aliases created for the alloca prior to CoroBegin but used after +// CoroBegin. llvm::Optional is used to be able to represent the case when the +// offset is unknown (e.g. when you have a PHINode that takes in different +// offset values). We cannot handle unknown offsets and will assert. This is the +// potential issue left out. An ideal solution would likely require a +// significant redesign. namespace { struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> { using Base = PtrUseVisitor<AllocaUseVisitor>; AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, - const CoroBeginInst &CB) - : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + const CoroBeginInst &CB, const SuspendCrossingInfo &Checker) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {} - // We are only interested in uses that dominate coro.begin. void visit(Instruction &I) { - if (DT.dominates(&I, &CoroBegin)) - Base::visit(I); + UserBBs.insert(I.getParent()); + Base::visit(I); + // If the pointer is escaped prior to CoroBegin, we have to assume it would + // be written into before CoroBegin as well. + if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) { + MayWriteBeforeCoroBegin = true; + } } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + void visitPHINode(PHINode &I) { + enqueueUsers(I); + handleAlias(I); + } + + void visitSelectInst(SelectInst &I) { + enqueueUsers(I); + handleAlias(I); + } + + void visitStoreInst(StoreInst &SI) { + // Regardless whether the alias of the alloca is the value operand or the + // pointer operand, we need to assume the alloca is been written. + handleMayWrite(SI); + + if (SI.getValueOperand() != U->get()) + return; + + // We are storing the pointer into a memory location, potentially escaping. + // As an optimization, we try to detect simple cases where it doesn't + // actually escape, for example: + // %ptr = alloca .. + // %addr = alloca .. + // store %ptr, %addr + // %x = load %addr + // .. + // If %addr is only used by loading from it, we could simply treat %x as + // another alias of %ptr, and not considering %ptr being escaped. + auto IsSimpleStoreThenLoad = [&]() { + auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand()); + // If the memory location we are storing to is not an alloca, it + // could be an alias of some other memory locations, which is difficult + // to analyze. + if (!AI) + return false; + // StoreAliases contains aliases of the memory location stored into. + SmallVector<Instruction *, 4> StoreAliases = {AI}; + while (!StoreAliases.empty()) { + Instruction *I = StoreAliases.pop_back_val(); + for (User *U : I->users()) { + // If we are loading from the memory location, we are creating an + // alias of the original pointer. + if (auto *LI = dyn_cast<LoadInst>(U)) { + enqueueUsers(*LI); + handleAlias(*LI); + continue; + } + // If we are overriding the memory location, the pointer certainly + // won't escape. + if (auto *S = dyn_cast<StoreInst>(U)) + if (S->getPointerOperand() == I) + continue; + if (auto *II = dyn_cast<IntrinsicInst>(U)) + if (II->isLifetimeStartOrEnd()) + continue; + // BitCastInst creats aliases of the memory location being stored + // into. + if (auto *BI = dyn_cast<BitCastInst>(U)) { + StoreAliases.push_back(BI); + continue; + } + return false; + } + } + + return true; + }; + + if (!IsSimpleStoreThenLoad()) + PI.setEscaped(&SI); + } - // If the use is an operand, the pointer escaped and anything can write into - // that memory. If the use is the pointer, we are definitely writing into the - // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); } - // Any other instruction that is not filtered out by PtrUseVisitor, will - // result in the copy. - void visitInstruction(Instruction &I) { PI.setAborted(&I); } + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + void visitCallBase(CallBase &CB) { + for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op) + if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) + PI.setEscaped(&CB); + handleMayWrite(CB); + } + + bool getShouldLiveOnFrame() const { + if (!ShouldLiveOnFrame) + ShouldLiveOnFrame = computeShouldLiveOnFrame(); + return ShouldLiveOnFrame.getValue(); + } + + bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } + + DenseMap<Instruction *, llvm::Optional<APInt>> getAliasesCopy() const { + assert(getShouldLiveOnFrame() && "This method should only be called if the " + "alloca needs to live on the frame."); + for (const auto &P : AliasOffetMap) + if (!P.second) + report_fatal_error("Unable to handle an alias with unknown offset " + "created before CoroBegin."); + return AliasOffetMap; + } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; -}; -} // namespace -static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, - const CoroBeginInst &CB) { - const DataLayout &DL = A.getModule()->getDataLayout(); - AllocaUseVisitor Visitor(DL, DT, CB); - auto PtrI = Visitor.visitPtr(A); - if (PtrI.isEscaped() || PtrI.isAborted()) { - auto *PointerEscapingInstr = PtrI.getEscapingInst() - ? PtrI.getEscapingInst() - : PtrI.getAbortingInst(); - if (PointerEscapingInstr) { - LLVM_DEBUG( - dbgs() << "AllocaInst copy was triggered by instruction: " - << *PointerEscapingInstr << "\n"); + const SuspendCrossingInfo &Checker; + // All alias to the original AllocaInst, created before CoroBegin and used + // after CoroBegin. Each entry contains the instruction and the offset in the + // original Alloca. They need to be recreated after CoroBegin off the frame. + DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{}; + SmallPtrSet<BasicBlock *, 2> UserBBs{}; + bool MayWriteBeforeCoroBegin{false}; + + mutable llvm::Optional<bool> ShouldLiveOnFrame{}; + + bool computeShouldLiveOnFrame() const { + if (PI.isEscaped()) + return true; + + for (auto *BB1 : UserBBs) + for (auto *BB2 : UserBBs) + if (Checker.hasPathCrossingSuspendPoint(BB1, BB2)) + return true; + + return false; + } + + void handleMayWrite(const Instruction &I) { + if (!DT.dominates(&CoroBegin, &I)) + MayWriteBeforeCoroBegin = true; + } + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(&CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + // We track all aliases created prior to CoroBegin but used after. + // These aliases may need to be recreated after CoroBegin if the alloca + // need to live on the frame. + if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I)) + return; + + if (!IsOffsetKnown) { + AliasOffetMap[&I].reset(); + } else { + auto Itr = AliasOffetMap.find(&I); + if (Itr == AliasOffetMap.end()) { + AliasOffetMap[&I] = Offset; + } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) { + // If we have seen two different possible values for this alias, we set + // it to empty. + AliasOffetMap[&I].reset(); + } } - return true; } - return false; -} +}; +} // namespace // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that @@ -720,7 +1100,8 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { // whatever // // -static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { +static Instruction *insertSpills(const FrameDataInfo &FrameData, + coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(CB->getNextNode()); @@ -729,32 +1110,13 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *FramePtr = cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); DominatorTree DT(*CB->getFunction()); - - Value *CurrentValue = nullptr; - BasicBlock *CurrentBlock = nullptr; - Value *CurrentReload = nullptr; - - // Proper field number will be read from field definition. - unsigned Index = InvalidFieldIndex; - - // We need to keep track of any allocas that need "spilling" - // since they will live in the coroutine frame now, all access to them - // need to be changed, not just the access across suspend points - // we remember allocas and their indices to be handled once we processed - // all the spills. - SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; - - // Promise alloca (if present) doesn't show in the spills and has a - // special field number. - if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { - assert(Shape.ABI == coro::ABI::Switch); - Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); - } + SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache; // Create a GEP with the given index into the coroutine frame for the original // value Orig. Appends an extra 0 index for array-allocas, preserving the // original type. - auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * { + auto GetFramePointer = [&](Value *Orig) -> Value * { + FieldIDType Index = FrameData.getFieldIndex(Orig); SmallVector<Value *, 3> Indices = { ConstantInt::get(Type::getInt32Ty(C), 0), ConstantInt::get(Type::getInt32Ty(C), Index), @@ -771,147 +1133,160 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { } } - return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices); - }; - - // Create a load instruction to reload the spilled value from the coroutine - // frame. Populates the Value pointer reference provided with the frame GEP. - auto CreateReload = [&](Instruction *InsertBefore, Value *&G) { - assert(Index != InvalidFieldIndex && "accessing unassigned field number"); - Builder.SetInsertPoint(InsertBefore); - - G = GetFramePointer(Index, CurrentValue); - G->setName(CurrentValue->getName() + Twine(".reload.addr")); - - return isa<AllocaInst>(CurrentValue) - ? G - : Builder.CreateLoad(FrameTy->getElementType(Index), G, - CurrentValue->getName() + Twine(".reload")); + auto GEP = cast<GetElementPtrInst>( + Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); + if (isa<AllocaInst>(Orig)) { + // If the type of GEP is not equal to the type of AllocaInst, it implies + // that the AllocaInst may be reused in the Frame slot of other + // AllocaInst. So We cast GEP to the AllocaInst here to re-use + // the Frame storage. + // + // Note: If we change the strategy dealing with alignment, we need to refine + // this casting. + if (GEP->getResultElementType() != Orig->getType()) + return Builder.CreateBitCast(GEP, Orig->getType(), + Orig->getName() + Twine(".cast")); + } + return GEP; }; - Value *GEP = nullptr, *CurrentGEP = nullptr; - for (auto const &E : Spills) { - // If we have not seen the value, generate a spill. - if (CurrentValue != E.def()) { - CurrentValue = E.def(); - CurrentBlock = nullptr; - CurrentReload = nullptr; - - Index = E.fieldIndex(); - - if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) { - // Spilled AllocaInst will be replaced with GEP from the coroutine frame - // there is no spill required. - Allocas.emplace_back(AI, Index); - if (!AI->isStaticAlloca()) - report_fatal_error("Coroutines cannot handle non static allocas yet"); + for (auto const &E : FrameData.Spills) { + Value *Def = E.first; + // Create a store instruction storing the value into the + // coroutine frame. + Instruction *InsertPt = nullptr; + bool NeedToCopyArgPtrValue = false; + if (auto *Arg = dyn_cast<Argument>(Def)) { + // For arguments, we will place the store instruction right after + // the coroutine frame pointer instruction, i.e. bitcast of + // coro.begin from i8* to %f.frame*. + InsertPt = FramePtr->getNextNode(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); + + if (Arg->hasByValAttr()) + NeedToCopyArgPtrValue = true; + + } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); + } else { + auto *I = cast<Instruction>(Def); + if (!DT.dominates(CB, I)) { + // If it is not dominated by CoroBegin, then spill should be + // inserted immediately after CoroFrame is computed. + InsertPt = FramePtr->getNextNode(); + } else if (auto *II = dyn_cast<InvokeInst>(I)) { + // If we are spilling the result of the invoke instruction, split + // the normal edge and insert the spill in the new block. + auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); + InsertPt = NewBB->getTerminator(); + } else if (isa<PHINode>(I)) { + // Skip the PHINodes and EH pads instructions. + BasicBlock *DefBlock = I->getParent(); + if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) + InsertPt = splitBeforeCatchSwitch(CSI); + else + InsertPt = &*DefBlock->getFirstInsertionPt(); } else { - // Otherwise, create a store instruction storing the value into the - // coroutine frame. - - Instruction *InsertPt = nullptr; - if (auto Arg = dyn_cast<Argument>(CurrentValue)) { - // For arguments, we will place the store instruction right after - // the coroutine frame pointer instruction, i.e. bitcast of - // coro.begin from i8* to %f.frame*. - InsertPt = FramePtr->getNextNode(); - - // If we're spilling an Argument, make sure we clear 'nocapture' - // from the coroutine function. - Arg->getParent()->removeParamAttr(Arg->getArgNo(), - Attribute::NoCapture); - - } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) { - // If we are spilling the result of the invoke instruction, split the - // normal edge and insert the spill in the new block. - auto NewBB = SplitEdge(II->getParent(), II->getNormalDest()); - InsertPt = NewBB->getTerminator(); - } else if (isa<PHINode>(CurrentValue)) { - // Skip the PHINodes and EH pads instructions. - BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent(); - if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) - InsertPt = splitBeforeCatchSwitch(CSI); - else - InsertPt = &*DefBlock->getFirstInsertionPt(); - } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) { - // Don't spill immediately after a suspend; splitting assumes - // that the suspend will be followed by a branch. - InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); - } else { - auto *I = cast<Instruction>(E.def()); - assert(!I->isTerminator() && "unexpected terminator"); - // For all other values, the spill is placed immediately after - // the definition. - if (DT.dominates(CB, I)) { - InsertPt = I->getNextNode(); - } else { - // Unless, it is not dominated by CoroBegin, then it will be - // inserted immediately after CoroFrame is computed. - InsertPt = FramePtr->getNextNode(); - } - } - - Builder.SetInsertPoint(InsertPt); - auto *G = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, Index, - CurrentValue->getName() + Twine(".spill.addr")); - Builder.CreateStore(CurrentValue, G); + assert(!I->isTerminator() && "unexpected terminator"); + // For all other values, the spill is placed immediately after + // the definition. + InsertPt = I->getNextNode(); } } - // If we have not seen the use block, generate a reload in it. - if (CurrentBlock != E.userBlock()) { - CurrentBlock = E.userBlock(); - CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP); + auto Index = FrameData.getFieldIndex(Def); + Builder.SetInsertPoint(InsertPt); + auto *G = Builder.CreateConstInBoundsGEP2_32( + FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); + if (NeedToCopyArgPtrValue) { + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + auto *Value = + Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); + Builder.CreateStore(Value, G); + } else { + Builder.CreateStore(Def, G); } - // If we have a single edge PHINode, remove it and replace it with a reload - // from the coroutine frame. (We already took care of multi edge PHINodes - // by rewriting them in the rewritePHIs function). - if (auto *PN = dyn_cast<PHINode>(E.user())) { - assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " - "values in the PHINode"); - PN->replaceAllUsesWith(CurrentReload); - PN->eraseFromParent(); - continue; - } + BasicBlock *CurrentBlock = nullptr; + Value *CurrentReload = nullptr; + for (auto *U : E.second) { + // If we have not seen the use block, create a load instruction to reload + // the spilled value from the coroutine frame. Populates the Value pointer + // reference provided with the frame GEP. + if (CurrentBlock != U->getParent()) { + CurrentBlock = U->getParent(); + Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt()); + + auto *GEP = GetFramePointer(E.first); + GEP->setName(E.first->getName() + Twine(".reload.addr")); + if (NeedToCopyArgPtrValue) + CurrentReload = GEP; + else + CurrentReload = Builder.CreateLoad( + FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, + E.first->getName() + Twine(".reload")); + + TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def); + for (DbgDeclareInst *DDI : DIs) { + bool AllowUnresolved = false; + // This dbg.declare is preserved for all coro-split function + // fragments. It will be unreachable in the main function, and + // processed by coro::salvageDebugInfo() by CoroCloner. + DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved) + .insertDeclare(CurrentReload, DDI->getVariable(), + DDI->getExpression(), DDI->getDebugLoc(), + &*Builder.GetInsertPoint()); + // This dbg.declare is for the main function entry point. It + // will be deleted in all coro-split functions. + coro::salvageDebugInfo(DbgPtrAllocaCache, DDI); + } + } - // If we have not seen this GEP instruction, migrate any dbg.declare from - // the alloca to it. - if (CurrentGEP != GEP) { - CurrentGEP = GEP; - TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue); - if (!DIs.empty()) - DIBuilder(*CurrentBlock->getParent()->getParent(), - /*AllowUnresolved*/ false) - .insertDeclare(CurrentGEP, DIs.front()->getVariable(), - DIs.front()->getExpression(), - DIs.front()->getDebugLoc(), DIs.front()); - } + // If we have a single edge PHINode, remove it and replace it with a + // reload from the coroutine frame. (We already took care of multi edge + // PHINodes by rewriting them in the rewritePHIs function). + if (auto *PN = dyn_cast<PHINode>(U)) { + assert(PN->getNumIncomingValues() == 1 && + "unexpected number of incoming " + "values in the PHINode"); + PN->replaceAllUsesWith(CurrentReload); + PN->eraseFromParent(); + continue; + } - // Replace all uses of CurrentValue in the current instruction with reload. - E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); + // Replace all uses of CurrentValue in the current instruction with + // reload. + U->replaceUsesOfWith(Def, CurrentReload); + } } BasicBlock *FramePtrBB = FramePtr->getParent(); auto SpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; // retcon and retcon.once lowering assumes all uses have been sunk. - if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::Async) { // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(&SpillBlock->front()); - for (auto &P : Allocas) { - auto *G = GetFramePointer(P.second, P.first); + for (const auto &P : FrameData.Allocas) { + AllocaInst *Alloca = P.Alloca; + auto *G = GetFramePointer(Alloca); // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) // here, as we are changing location of the instruction. - G->takeName(P.first); - P.first->replaceAllUsesWith(G); - P.first->eraseFromParent(); + G->takeName(Alloca); + Alloca->replaceAllUsesWith(G); + Alloca->eraseFromParent(); } return FramePtr; } @@ -921,49 +1296,63 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // we also delete the original dbg.declare and replace other uses with undef. // Note: We cannot replace the alloca with GEP instructions indiscriminately, // as some of the uses may not be dominated by CoroBegin. - bool MightNeedToCopy = false; Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); SmallVector<Instruction *, 4> UsersToUpdate; - for (auto &P : Allocas) { - AllocaInst *const A = P.first; - - for (auto *DI : FindDbgDeclareUses(A)) - DI->eraseFromParent(); - replaceDbgUsesWithUndef(A); - + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; UsersToUpdate.clear(); - for (User *U : A->users()) { + for (User *U : Alloca->users()) { auto *I = cast<Instruction>(U); if (DT.dominates(CB, I)) UsersToUpdate.push_back(I); - else - MightNeedToCopy = true; - } - if (!UsersToUpdate.empty()) { - auto *G = GetFramePointer(P.second, A); - G->takeName(A); - for (Instruction *I : UsersToUpdate) - I->replaceUsesOfWith(A, G); } - } - // If we discovered such uses not dominated by CoroBegin, see if any of them - // preceed coro begin and have instructions that can modify the - // value of the alloca and therefore would require a copying the value into - // the spill slot in the coroutine frame. - if (MightNeedToCopy) { - Builder.SetInsertPoint(FramePtr->getNextNode()); - - for (auto &P : Allocas) { - AllocaInst *const A = P.first; - if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { - if (A->isArrayAllocation()) - report_fatal_error( - "Coroutines cannot handle copying of array allocas yet"); + if (UsersToUpdate.empty()) + continue; + auto *G = GetFramePointer(Alloca); + G->setName(Alloca->getName() + Twine(".reload.addr")); + + SmallPtrSet<BasicBlock *, 4> SeenDbgBBs; + TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca); + if (!DIs.empty()) + DIBuilder(*Alloca->getModule(), + /*AllowUnresolved*/ false) + .insertDeclare(G, DIs.front()->getVariable(), + DIs.front()->getExpression(), + DIs.front()->getDebugLoc(), DIs.front()); + for (auto *DI : FindDbgDeclareUses(Alloca)) + DI->eraseFromParent(); + replaceDbgUsesWithUndef(Alloca); - auto *G = GetFramePointer(P.second, A); - auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); - Builder.CreateStore(Value, G); - } + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(Alloca, G); + } + Builder.SetInsertPoint(FramePtr->getNextNode()); + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; + if (A.MayWriteBeforeCoroBegin) { + // isEscaped really means potentially modified before CoroBegin. + if (Alloca->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); + + auto *G = GetFramePointer(Alloca); + auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca); + Builder.CreateStore(Value, G); + } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : A.Aliases) { + auto *FramePtr = GetFramePointer(Alloca); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, + ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue())); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); } } return FramePtr; @@ -984,15 +1373,14 @@ static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { // Replaces all uses of OldPred with the NewPred block in all PHINodes in a // block. static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, - BasicBlock *NewPred, - PHINode *LandingPadReplacement) { + BasicBlock *NewPred, PHINode *Until = nullptr) { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); // We manually update the LandingPadReplacement PHINode and it is the last // PHI Node. So, if we find it, we are done. - if (LandingPadReplacement == PN) + if (Until == PN) break; // Reuse the previous value of BBIdx if it lines up. In cases where we @@ -1041,6 +1429,101 @@ static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, return NewBB; } +// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new +// PHI in InsertedBB. +static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, + BasicBlock *InsertedBB, + BasicBlock *PredBB, + PHINode *UntilPHI = nullptr) { + auto *PN = cast<PHINode>(&SuccBB->front()); + do { + int Index = PN->getBasicBlockIndex(InsertedBB); + Value *V = PN->getIncomingValue(Index); + PHINode *InputV = PHINode::Create( + V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(), + &InsertedBB->front()); + InputV->addIncoming(V, PredBB); + PN->setIncomingValue(Index, InputV); + PN = dyn_cast<PHINode>(PN->getNextNode()); + } while (PN != UntilPHI); +} + +// Rewrites the PHI Nodes in a cleanuppad. +static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, + CleanupPadInst *CleanupPad) { + // For every incoming edge to a CleanupPad we will create a new block holding + // all incoming values in single-value PHI nodes. We will then create another + // block to act as a dispather (as all unwind edges for related EH blocks + // must be the same). + // + // cleanuppad: + // %2 = phi i32[%0, %catchswitch], [%1, %catch.1] + // %3 = cleanuppad within none [] + // + // It will create: + // + // cleanuppad.corodispatch + // %2 = phi i8[0, %catchswitch], [1, %catch.1] + // %3 = cleanuppad within none [] + // switch i8 % 2, label %unreachable + // [i8 0, label %cleanuppad.from.catchswitch + // i8 1, label %cleanuppad.from.catch.1] + // cleanuppad.from.catchswitch: + // %4 = phi i32 [%0, %catchswitch] + // br %label cleanuppad + // cleanuppad.from.catch.1: + // %6 = phi i32 [%1, %catch.1] + // br %label cleanuppad + // cleanuppad: + // %8 = phi i32 [%4, %cleanuppad.from.catchswitch], + // [%6, %cleanuppad.from.catch.1] + + // Unreachable BB, in case switching on an invalid value in the dispatcher. + auto *UnreachBB = BasicBlock::Create( + CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent()); + IRBuilder<> Builder(UnreachBB); + Builder.CreateUnreachable(); + + // Create a new cleanuppad which will be the dispatcher. + auto *NewCleanupPadBB = + BasicBlock::Create(CleanupPadBB->getContext(), + CleanupPadBB->getName() + Twine(".corodispatch"), + CleanupPadBB->getParent(), CleanupPadBB); + Builder.SetInsertPoint(NewCleanupPadBB); + auto *SwitchType = Builder.getInt8Ty(); + auto *SetDispatchValuePN = + Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB)); + CleanupPad->removeFromParent(); + CleanupPad->insertAfter(SetDispatchValuePN); + auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB, + pred_size(CleanupPadBB)); + + int SwitchIndex = 0; + SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB)); + for (BasicBlock *Pred : Preds) { + // Create a new cleanuppad and move the PHI values to there. + auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(), + CleanupPadBB->getName() + + Twine(".from.") + Pred->getName(), + CleanupPadBB->getParent(), CleanupPadBB); + updatePhiNodes(CleanupPadBB, Pred, CaseBB); + CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") + + Pred->getName()); + Builder.SetInsertPoint(CaseBB); + Builder.CreateBr(CleanupPadBB); + movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB); + + // Update this Pred to the new unwind point. + setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB); + + // Setup the switch in the dispatcher. + auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex); + SetDispatchValuePN->addIncoming(SwitchConstant, Pred); + SwitchOnDispatch->addCase(SwitchConstant, CaseBB); + SwitchIndex++; + } +} + static void rewritePHIs(BasicBlock &BB) { // For every incoming edge we will create a block holding all // incoming values in a single PHI nodes. @@ -1063,6 +1546,24 @@ static void rewritePHIs(BasicBlock &BB) { // TODO: Simplify PHINodes in the basic block to remove duplicate // predecessors. + // Special case for CleanupPad: all EH blocks must have the same unwind edge + // so we need to create an additional "dispatcher" block. + if (auto *CleanupPad = + dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) { + SmallVector<BasicBlock *, 8> Preds(predecessors(&BB)); + for (BasicBlock *Pred : Preds) { + if (CatchSwitchInst *CS = + dyn_cast<CatchSwitchInst>(Pred->getTerminator())) { + // CleanupPad with a CatchSwitch predecessor: therefore this is an + // unwind destination that needs to be handle specially. + assert(CS->getUnwindDest() == &BB); + (void)CS; + rewritePHIsForCleanupPad(&BB, CleanupPad); + return; + } + } + } + LandingPadInst *LandingPad = nullptr; PHINode *ReplPHI = nullptr; if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) { @@ -1076,22 +1577,14 @@ static void rewritePHIs(BasicBlock &BB) { // ehAwareSplitEdge cloned it in the transition blocks. } - SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); + SmallVector<BasicBlock *, 8> Preds(predecessors(&BB)); for (BasicBlock *Pred : Preds) { auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); - auto *PN = cast<PHINode>(&BB.front()); - do { - int Index = PN->getBasicBlockIndex(IncomingBB); - Value *V = PN->getIncomingValue(Index); - PHINode *InputV = PHINode::Create( - V->getType(), 1, V->getName() + Twine(".") + BB.getName(), - &IncomingBB->front()); - InputV->addIncoming(V, Pred); - PN->setIncomingValue(Index, InputV); - PN = dyn_cast<PHINode>(PN->getNextNode()); - } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced - // the landing pad. + + // Stop the moving of values at ReplPHI, as this is either null or the PHI + // that replaced the landing pad. + movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI); } if (LandingPad) { @@ -1130,39 +1623,32 @@ static bool isCoroutineStructureIntrinsic(Instruction &I) { // For every use of the value that is across suspend point, recreate that value // after a suspend point. static void rewriteMaterializableInstructions(IRBuilder<> &IRB, - SpillInfo const &Spills) { - BasicBlock *CurrentBlock = nullptr; - Instruction *CurrentMaterialization = nullptr; - Instruction *CurrentDef = nullptr; - - for (auto const &E : Spills) { - // If it is a new definition, update CurrentXXX variables. - if (CurrentDef != E.def()) { - CurrentDef = cast<Instruction>(E.def()); - CurrentBlock = nullptr; - CurrentMaterialization = nullptr; - } - - // If we have not seen this block, materialize the value. - if (CurrentBlock != E.userBlock()) { - CurrentBlock = E.userBlock(); - CurrentMaterialization = cast<Instruction>(CurrentDef)->clone(); - CurrentMaterialization->setName(CurrentDef->getName()); - CurrentMaterialization->insertBefore( - &*CurrentBlock->getFirstInsertionPt()); - } - - if (auto *PN = dyn_cast<PHINode>(E.user())) { - assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " - "values in the PHINode"); - PN->replaceAllUsesWith(CurrentMaterialization); - PN->eraseFromParent(); - continue; + const SpillInfo &Spills) { + for (const auto &E : Spills) { + Value *Def = E.first; + BasicBlock *CurrentBlock = nullptr; + Instruction *CurrentMaterialization = nullptr; + for (Instruction *U : E.second) { + // If we have not seen this block, materialize the value. + if (CurrentBlock != U->getParent()) { + CurrentBlock = U->getParent(); + CurrentMaterialization = cast<Instruction>(Def)->clone(); + CurrentMaterialization->setName(Def->getName()); + CurrentMaterialization->insertBefore( + &*CurrentBlock->getFirstInsertionPt()); + } + if (auto *PN = dyn_cast<PHINode>(U)) { + assert(PN->getNumIncomingValues() == 1 && + "unexpected number of incoming " + "values in the PHINode"); + PN->replaceAllUsesWith(CurrentMaterialization); + PN->eraseFromParent(); + continue; + } + // Replace all uses of Def in the current instruction with the + // CurrentMaterialization for the block. + U->replaceUsesOfWith(Def, CurrentMaterialization); } - - // Replace all uses of CurrentDef in the current instruction with the - // CurrentMaterialization for the block. - E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization); } } @@ -1501,7 +1987,8 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) { /// retcon and retcon.once conventions assume that all spill uses can be sunk /// after the coro.begin intrinsic. -static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, +static void sinkSpillUsesAfterCoroBegin(Function &F, + const FrameDataInfo &FrameData, CoroBeginInst *CoroBegin) { DominatorTree Dom(F); @@ -1509,9 +1996,8 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, SmallVector<Instruction *, 32> Worklist; // Collect all users that precede coro.begin. - for (auto const &Entry : Spills) { - auto *SpillDef = Entry.def(); - for (User *U : SpillDef->users()) { + for (auto *Def : FrameData.getAllDefs()) { + for (User *U : Def->users()) { auto Inst = cast<Instruction>(U); if (Inst->getParent() != CoroBegin->getParent() || Dom.dominates(CoroBegin, Inst)) @@ -1522,8 +2008,7 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, } // Recursively collect users before coro.begin. while (!Worklist.empty()) { - auto *Def = Worklist.back(); - Worklist.pop_back(); + auto *Def = Worklist.pop_back_val(); for (User *U : Def->users()) { auto Inst = cast<Instruction>(U); if (Dom.dominates(CoroBegin, Inst)) @@ -1535,17 +2020,14 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, // Sort by dominance. SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end()); - std::sort(InsertionList.begin(), InsertionList.end(), - [&Dom](Instruction *A, Instruction *B) -> bool { - // If a dominates b it should preceed (<) b. - return Dom.dominates(A, B); - }); + llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool { + // If a dominates b it should preceed (<) b. + return Dom.dominates(A, B); + }); Instruction *InsertPt = CoroBegin->getNextNode(); for (Instruction *Inst : InsertionList) Inst->moveBefore(InsertPt); - - return; } /// For each local variable that all of its user are only used inside one of @@ -1567,56 +2049,186 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, } for (Instruction &I : instructions(F)) { - if (!isa<AllocaInst>(&I)) + AllocaInst* AI = dyn_cast<AllocaInst>(&I); + if (!AI) continue; for (BasicBlock *DomBB : DomSet) { bool Valid = true; - SmallVector<Instruction *, 1> BCInsts; + SmallVector<Instruction *, 1> Lifetimes; - auto isUsedByLifetimeStart = [&](Instruction *I) { - if (isa<BitCastInst>(I) && I->hasOneUse()) - if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back())) - return IT->getIntrinsicID() == Intrinsic::lifetime_start; + auto isLifetimeStart = [](Instruction* I) { + if (auto* II = dyn_cast<IntrinsicInst>(I)) + return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; }; - for (User *U : I.users()) { + auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) { + if (isLifetimeStart(U)) { + Lifetimes.push_back(U); + return true; + } + if (!U->hasOneUse() || U->stripPointerCasts() != AI) + return false; + if (isLifetimeStart(U->user_back())) { + Lifetimes.push_back(U->user_back()); + return true; + } + return false; + }; + + for (User *U : AI->users()) { Instruction *UI = cast<Instruction>(U); // For all users except lifetime.start markers, if they are all // dominated by one of the basic blocks and do not cross // suspend points as well, then there is no need to spill the // instruction. if (!DT.dominates(DomBB, UI->getParent()) || - Checker.isDefinitionAcrossSuspend(DomBB, U)) { - // Skip bitcast used by lifetime.start markers. - if (isUsedByLifetimeStart(UI)) { - BCInsts.push_back(UI); + Checker.isDefinitionAcrossSuspend(DomBB, UI)) { + // Skip lifetime.start, GEP and bitcast used by lifetime.start + // markers. + if (collectLifetimeStart(UI, AI)) continue; - } Valid = false; break; } } // Sink lifetime.start markers to dominate block when they are // only used outside the region. - if (Valid && BCInsts.size() != 0) { - auto *NewBitcast = BCInsts[0]->clone(); - auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone(); - NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast); - NewBitcast->insertBefore(DomBB->getTerminator()); + if (Valid && Lifetimes.size() != 0) { + // May be AI itself, when the type of AI is i8* + auto *NewBitCast = [&](AllocaInst *AI) -> Value* { + if (isa<AllocaInst>(Lifetimes[0]->getOperand(1))) + return AI; + auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); + return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", + DomBB->getTerminator()); + }(AI); + + auto *NewLifetime = Lifetimes[0]->clone(); + NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); NewLifetime->insertBefore(DomBB->getTerminator()); // All the outsided lifetime.start markers are no longer necessary. - for (Instruction *S : BCInsts) { - S->user_back()->eraseFromParent(); - } + for (Instruction *S : Lifetimes) + S->eraseFromParent(); + break; } } } } +static void collectFrameAllocas(Function &F, coro::Shape &Shape, + const SuspendCrossingInfo &Checker, + SmallVectorImpl<AllocaInfo> &Allocas) { + // Collect lifetime.start info for each alloca. + using LifetimeStart = SmallPtrSet<Instruction *, 2>; + llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap; + for (Instruction &I : instructions(F)) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) + continue; + + if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { + if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { + + if (LifetimeMap.find(AI) == LifetimeMap.end()) + LifetimeMap[AI] = std::make_unique<LifetimeStart>(); + LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); + } + } + } + + for (Instruction &I : instructions(F)) { + auto *AI = dyn_cast<AllocaInst>(&I); + if (!AI) + continue; + // The PromiseAlloca will be specially handled since it needs to be in a + // fixed position in the frame. + if (AI == Shape.SwitchLowering.PromiseAlloca) { + continue; + } + bool ShouldLiveOnFrame = false; + auto Iter = LifetimeMap.find(AI); + if (Iter != LifetimeMap.end()) { + // Check against lifetime.start if the instruction has the info. + for (User *U : I.users()) { + for (auto *S : *Iter->second) + if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) + break; + if (ShouldLiveOnFrame) + break; + } + if (!ShouldLiveOnFrame) + continue; + } + // At this point, either ShouldLiveOnFrame is true or we didn't have + // lifetime information. We will need to rely on more precise pointer + // tracking. + DominatorTree DT(F); + AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, + *Shape.CoroBegin, Checker}; + Visitor.visitPtr(*AI); + if (!Visitor.getShouldLiveOnFrame()) + continue; + Allocas.emplace_back(AI, Visitor.getAliasesCopy(), + Visitor.getMayWriteBeforeCoroBegin()); + } +} + +void coro::salvageDebugInfo( + SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache, + DbgDeclareInst *DDI, bool LoadFromFramePtr) { + Function *F = DDI->getFunction(); + IRBuilder<> Builder(F->getContext()); + auto InsertPt = F->getEntryBlock().getFirstInsertionPt(); + while (isa<IntrinsicInst>(InsertPt)) + ++InsertPt; + Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt); + DIExpression *Expr = DDI->getExpression(); + // Follow the pointer arithmetic all the way to the incoming + // function argument and convert into a DIExpression. + Value *Storage = DDI->getAddress(); + while (Storage) { + if (auto *LdInst = dyn_cast<LoadInst>(Storage)) { + Storage = LdInst->getOperand(0); + } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) { + Storage = StInst->getOperand(0); + } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) { + Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr, + /*WithStackValue=*/false); + Storage = GEPInst->getOperand(0); + } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage)) + Storage = BCInst->getOperand(0); + else + break; + } + // Store a pointer to the coroutine frame object in an alloca so it + // is available throughout the function when producing unoptimized + // code. Extending the lifetime this way is correct because the + // variable has been declared by a dbg.declare intrinsic. + if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) { + auto &Cached = DbgPtrAllocaCache[Storage]; + if (!Cached) { + Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, + Arg->getName() + ".debug"); + Builder.CreateStore(Storage, Cached); + } + Storage = Cached; + } + // The FramePtr object adds one extra layer of indirection that + // needs to be unwrapped. + if (LoadFromFramePtr) + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); + auto &VMContext = DDI->getFunction()->getContext(); + DDI->setOperand( + 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage))); + DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr)); + if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage)) + DDI->moveAfter(InsertPt); +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { eliminateSwiftError(F, Shape); @@ -1635,9 +2247,26 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { } // Put CoroEnds into their own blocks. - for (CoroEndInst *CE : Shape.CoroEnds) + for (AnyCoroEndInst *CE : Shape.CoroEnds) { splitAround(CE, "CoroEnd"); + // Emit the musttail call function in a new block before the CoroEnd. + // We do this here so that the right suspend crossing info is computed for + // the uses of the musttail call function call. (Arguments to the coro.end + // instructions would be ignored) + if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) { + auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction(); + if (!MustTailCallFn) + continue; + IRBuilder<> Builder(AsyncEnd); + SmallVector<Value *, 8> Args(AsyncEnd->args()); + auto Arguments = ArrayRef<Value *>(Args).drop_front(3); + auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn, + Arguments, Builder); + splitAround(Call, "MustTailCall.Before.CoroEnd"); + } + } + // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. rewritePHIs(F); @@ -1646,51 +2275,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { SuspendCrossingInfo Checker(F, Shape); IRBuilder<> Builder(F.getContext()); - SpillInfo Spills; + FrameDataInfo FrameData; SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; SmallVector<Instruction*, 4> DeadInstructions; - for (int Repeat = 0; Repeat < 4; ++Repeat) { - // See if there are materializable instructions across suspend points. - for (Instruction &I : instructions(F)) - if (materializable(I)) - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) - Spills.emplace_back(&I, U); - - if (Spills.empty()) - break; + { + SpillInfo Spills; + for (int Repeat = 0; Repeat < 4; ++Repeat) { + // See if there are materializable instructions across suspend points. + for (Instruction &I : instructions(F)) + if (materializable(I)) + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills[&I].push_back(cast<Instruction>(U)); + + if (Spills.empty()) + break; - // Rewrite materializable instructions to be materialized at the use point. - LLVM_DEBUG(dump("Materializations", Spills)); - rewriteMaterializableInstructions(Builder, Spills); - Spills.clear(); + // Rewrite materializable instructions to be materialized at the use + // point. + LLVM_DEBUG(dumpSpills("Materializations", Spills)); + rewriteMaterializableInstructions(Builder, Spills); + Spills.clear(); + } } sinkLifetimeStartMarkers(F, Shape, Checker); - // Collect lifetime.start info for each alloca. - using LifetimeStart = SmallPtrSet<Instruction *, 2>; - llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap; - for (Instruction &I : instructions(F)) { - auto *II = dyn_cast<IntrinsicInst>(&I); - if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) - continue; - - if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1))) - if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) { - - if (LifetimeMap.find(AI) == LifetimeMap.end()) - LifetimeMap[AI] = std::make_unique<LifetimeStart>(); - - LifetimeMap[AI]->insert(OpInst); - } - } + collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); + LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) if (Checker.isDefinitionAcrossSuspend(A, U)) - Spills.emplace_back(&A, U); + FrameData.Spills[&A].push_back(cast<Instruction>(U)); for (Instruction &I : instructions(F)) { // Values returned from coroutine structure intrinsics should not be part @@ -1721,43 +2339,33 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { for (User *U : Alloc->users()) { if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) - Spills.emplace_back(Alloc, U); + FrameData.Spills[Alloc].push_back(cast<Instruction>(U)); } continue; } // Ignore alloca.get; we process this as part of coro.alloca.alloc. - if (isa<CoroAllocaGetInst>(I)) { + if (isa<CoroAllocaGetInst>(I)) continue; - } - auto Iter = LifetimeMap.find(&I); - for (User *U : I.users()) { - bool NeedSpill = false; - - // Check against lifetime.start if the instruction has the info. - if (Iter != LifetimeMap.end()) - for (auto *S : *Iter->second) { - if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U))) - break; - } - else - NeedSpill = Checker.isDefinitionAcrossSuspend(I, U); + if (isa<AllocaInst>(I)) + continue; - if (NeedSpill) { + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); - Spills.emplace_back(&I, U); + FrameData.Spills[&I].push_back(cast<Instruction>(U)); } - } } - LLVM_DEBUG(dump("Spills", Spills)); - if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) - sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); - Shape.FrameTy = buildFrameType(F, Shape, Spills); - Shape.FramePtr = insertSpills(Spills, Shape); + LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || + Shape.ABI == coro::ABI::Async) + sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); + Shape.FrameTy = buildFrameType(F, Shape, FrameData); + Shape.FramePtr = insertSpills(FrameData, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); for (auto I : DeadInstructions) |