aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Coroutines/CoroFrame.cpp')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp771
1 files changed, 590 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e53e7605b254..beae5fdac8ab 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -12,8 +12,6 @@
// contain those values. All uses of those values are replaced with appropriate
// GEP + load from the coroutine frame. At the point of the definition we spill
// the value into the coroutine frame.
-//
-// TODO: pack values tightly using liveness info.
//===----------------------------------------------------------------------===//
#include "CoroInternal.h"
@@ -32,6 +30,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -163,6 +162,16 @@ struct SuspendCrossingInfo {
return isDefinitionAcrossSuspend(DefBB, U);
}
+
+ bool isDefinitionAcrossSuspend(Value &V, User *U) const {
+ if (auto *Arg = dyn_cast<Argument>(&V))
+ return isDefinitionAcrossSuspend(*Arg, U);
+ if (auto *Inst = dyn_cast<Instruction>(&V))
+ return isDefinitionAcrossSuspend(*Inst, U);
+
+ llvm_unreachable(
+ "Coroutine could only collect Argument and Instruction now.");
+ }
};
} // end anonymous namespace
@@ -336,6 +345,28 @@ struct FrameDataInfo {
FieldIndexMap[V] = Index;
}
+ uint64_t getAlign(Value *V) const {
+ auto Iter = FieldAlignMap.find(V);
+ assert(Iter != FieldAlignMap.end());
+ return Iter->second;
+ }
+
+ void setAlign(Value *V, uint64_t Align) {
+ assert(FieldAlignMap.count(V) == 0);
+ FieldAlignMap.insert({V, Align});
+ }
+
+ uint64_t getOffset(Value *V) const {
+ auto Iter = FieldOffsetMap.find(V);
+ assert(Iter != FieldOffsetMap.end());
+ return Iter->second;
+ }
+
+ void setOffset(Value *V, uint64_t Offset) {
+ assert(FieldOffsetMap.count(V) == 0);
+ FieldOffsetMap.insert({V, Offset});
+ }
+
// Remap the index of every field in the frame, using the final layout index.
void updateLayoutIndex(FrameTypeBuilder &B);
@@ -347,6 +378,12 @@ private:
// with their original insertion field index. After the frame is built, their
// indexes will be updated into the final layout index.
DenseMap<Value *, uint32_t> FieldIndexMap;
+ // Map from values to their alignment on the frame. They would be set after
+ // the frame is built.
+ DenseMap<Value *, uint64_t> FieldAlignMap;
+ // Map from values to their offset on the frame. They would be set after
+ // the frame is built.
+ DenseMap<Value *, uint64_t> FieldOffsetMap;
};
} // namespace
@@ -392,12 +429,15 @@ private:
Align StructAlign;
bool IsFinished = false;
+ Optional<Align> MaxFrameAlignment;
+
SmallVector<Field, 8> Fields;
DenseMap<Value*, unsigned> FieldIndexByKey;
public:
- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
- : DL(DL), Context(Context) {}
+ FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
+ Optional<Align> MaxFrameAlignment)
+ : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
/// Add a field to this structure for the storage of an `alloca`
/// instruction.
@@ -448,17 +488,32 @@ public:
/// Add a field to this structure.
LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
- bool IsHeader = false) {
+ bool IsHeader = false,
+ bool IsSpillOfValue = false) {
assert(!IsFinished && "adding fields to a finished builder");
assert(Ty && "must provide a type for a field");
// The field size is always the alloc size of the type.
uint64_t FieldSize = DL.getTypeAllocSize(Ty);
+ // For an alloca with size=0, we don't need to add a field and they
+ // can just point to any index in the frame. Use index 0.
+ if (FieldSize == 0) {
+ return 0;
+ }
+
// The field alignment might not be the type alignment, but we need
// to remember the type alignment anyway to build the type.
- Align TyAlignment = DL.getABITypeAlign(Ty);
- if (!FieldAlignment) FieldAlignment = TyAlignment;
+ // If we are spilling values we don't need to worry about ABI alignment
+ // concerns.
+ auto ABIAlign = DL.getABITypeAlign(Ty);
+ Align TyAlignment =
+ (IsSpillOfValue && MaxFrameAlignment)
+ ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
+ : ABIAlign;
+ if (!FieldAlignment) {
+ FieldAlignment = TyAlignment;
+ }
// Lay out header fields immediately.
uint64_t Offset;
@@ -492,12 +547,20 @@ public:
assert(IsFinished && "not yet finished!");
return Fields[Id].LayoutFieldIndex;
}
+
+ Field getLayoutField(FieldIDType Id) const {
+ assert(IsFinished && "not yet finished!");
+ return Fields[Id];
+ }
};
} // namespace
void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Updater = [&](Value *I) {
- setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
+ auto Field = B.getLayoutField(getFieldIndex(I));
+ setFieldIndex(I, Field.LayoutFieldIndex);
+ setAlign(I, Field.Alignment.value());
+ setOffset(I, Field.Offset);
};
LayoutIndexUpdateStarted = true;
for (auto &S : Spills)
@@ -510,7 +573,6 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
void FrameTypeBuilder::addFieldForAllocas(const Function &F,
FrameDataInfo &FrameData,
coro::Shape &Shape) {
- DenseMap<AllocaInst *, unsigned int> AllocaIndex;
using AllocaSetType = SmallVector<AllocaInst *, 4>;
SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
@@ -532,7 +594,6 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
- AllocaIndex[Alloca] = NonOverlapedAllocas.size();
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
}
return;
@@ -613,13 +674,11 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
bool CouldMerge = NoInference && Alignable;
if (!CouldMerge)
continue;
- AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];
AllocaSet.push_back(Alloca);
Merged = true;
break;
}
if (!Merged) {
- AllocaIndex[Alloca] = NonOverlapedAllocas.size();
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
}
}
@@ -716,6 +775,314 @@ void FrameTypeBuilder::finish(StructType *Ty) {
IsFinished = true;
}
+static void cacheDIVar(FrameDataInfo &FrameData,
+ DenseMap<Value *, DILocalVariable *> &DIVarCache) {
+ for (auto *V : FrameData.getAllDefs()) {
+ if (DIVarCache.find(V) != DIVarCache.end())
+ continue;
+
+ auto DDIs = FindDbgDeclareUses(V);
+ auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {
+ return DDI->getExpression()->getNumElements() == 0;
+ });
+ if (I != DDIs.end())
+ DIVarCache.insert({V, (*I)->getVariable()});
+ }
+}
+
+/// Create name for Type. It uses MDString to store new created string to
+/// avoid memory leak.
+static StringRef solveTypeName(Type *Ty) {
+ if (Ty->isIntegerTy()) {
+ // The longest name in common may be '__int_128', which has 9 bits.
+ SmallString<16> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
+ auto *MDName = MDString::get(Ty->getContext(), OS.str());
+ return MDName->getString();
+ }
+
+ if (Ty->isFloatingPointTy()) {
+ if (Ty->isFloatTy())
+ return "__float_";
+ if (Ty->isDoubleTy())
+ return "__double_";
+ return "__floating_type_";
+ }
+
+ if (Ty->isPointerTy()) {
+ auto *PtrTy = cast<PointerType>(Ty);
+ Type *PointeeTy = PtrTy->getElementType();
+ auto Name = solveTypeName(PointeeTy);
+ if (Name == "UnknownType")
+ return "PointerType";
+ SmallString<16> Buffer;
+ Twine(Name + "_Ptr").toStringRef(Buffer);
+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
+ return MDName->getString();
+ }
+
+ if (Ty->isStructTy()) {
+ if (!cast<StructType>(Ty)->hasName())
+ return "__LiteralStructType_";
+
+ auto Name = Ty->getStructName();
+
+ SmallString<16> Buffer(Name);
+ for_each(Buffer, [](auto &Iter) {
+ if (Iter == '.' || Iter == ':')
+ Iter = '_';
+ });
+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
+ return MDName->getString();
+ }
+
+ return "UnknownType";
+}
+
+static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,
+ DIScope *Scope, unsigned LineNum,
+ DenseMap<Type *, DIType *> &DITypeCache) {
+ if (DIType *DT = DITypeCache.lookup(Ty))
+ return DT;
+
+ StringRef Name = solveTypeName(Ty);
+
+ DIType *RetType = nullptr;
+
+ if (Ty->isIntegerTy()) {
+ auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isFloatingPointTy()) {
+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_float,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isPointerTy()) {
+ // Construct BasicType instead of PointerType to avoid infinite
+ // search problem.
+ // For example, we would be in trouble if we traverse recursively:
+ //
+ // struct Node {
+ // Node* ptr;
+ // };
+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_address,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isStructTy()) {
+ auto *DIStruct = Builder.createStructType(
+ Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
+ Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr,
+ llvm::DINodeArray());
+
+ auto *StructTy = cast<StructType>(Ty);
+ SmallVector<Metadata *, 16> Elements;
+ for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
+ DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
+ Scope, LineNum, DITypeCache);
+ assert(DITy);
+ Elements.push_back(Builder.createMemberType(
+ Scope, DITy->getName(), Scope->getFile(), LineNum,
+ DITy->getSizeInBits(), DITy->getAlignInBits(),
+ Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
+ llvm::DINode::FlagArtificial, DITy));
+ }
+
+ Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
+
+ RetType = DIStruct;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);
+ SmallString<32> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);
+ RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_address,
+ llvm::DINode::FlagArtificial);
+ }
+
+ DITypeCache.insert({Ty, RetType});
+ return RetType;
+}
+
+/// Build artificial debug info for C++ coroutine frames to allow users to
+/// inspect the contents of the frame directly
+///
+/// Create Debug information for coroutine frame with debug name "__coro_frame".
+/// The debug information for the fields of coroutine frame is constructed from
+/// the following way:
+/// 1. For all the value in the Frame, we search the use of dbg.declare to find
+/// the corresponding debug variables for the value. If we can find the
+/// debug variable, we can get full and accurate debug information.
+/// 2. If we can't get debug information in step 1 and 2, we could only try to
+/// build the DIType by Type. We did this in solveDIType. We only handle
+/// integer, float, double, integer type and struct type for now.
+static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
+ FrameDataInfo &FrameData) {
+ DISubprogram *DIS = F.getSubprogram();
+ // If there is no DISubprogram for F, it implies the Function are not compiled
+ // with debug info. So we also don't need to generate debug info for the frame
+ // neither.
+ if (!DIS || !DIS->getUnit() ||
+ !dwarf::isCPlusPlus(
+ (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
+ return;
+
+ assert(Shape.ABI == coro::ABI::Switch &&
+ "We could only build debug infomation for C++ coroutine now.\n");
+
+ DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
+
+ AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
+ assert(PromiseAlloca &&
+ "Coroutine with switch ABI should own Promise alloca");
+
+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);
+ if (DIs.empty())
+ return;
+
+ DbgDeclareInst *PromiseDDI = DIs.front();
+ DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();
+ DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
+ DIFile *DFile = PromiseDIScope->getFile();
+ DILocation *DILoc = PromiseDDI->getDebugLoc().get();
+ unsigned LineNum = PromiseDIVariable->getLine();
+
+ DICompositeType *FrameDITy = DBuilder.createStructType(
+ DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,
+ Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
+ llvm::DINodeArray());
+ StructType *FrameTy = Shape.FrameTy;
+ SmallVector<Metadata *, 16> Elements;
+ DataLayout Layout = F.getParent()->getDataLayout();
+
+ DenseMap<Value *, DILocalVariable *> DIVarCache;
+ cacheDIVar(FrameData, DIVarCache);
+
+ unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
+ unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
+ unsigned IndexIndex = Shape.SwitchLowering.IndexField;
+
+ DenseMap<unsigned, StringRef> NameCache;
+ NameCache.insert({ResumeIndex, "__resume_fn"});
+ NameCache.insert({DestroyIndex, "__destroy_fn"});
+ NameCache.insert({IndexIndex, "__coro_index"});
+
+ Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
+ *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
+ *IndexTy = FrameTy->getElementType(IndexIndex);
+
+ DenseMap<unsigned, DIType *> TyCache;
+ TyCache.insert({ResumeIndex,
+ DBuilder.createBasicType("__resume_fn",
+ Layout.getTypeSizeInBits(ResumeFnTy),
+ dwarf::DW_ATE_address)});
+ TyCache.insert(
+ {DestroyIndex, DBuilder.createBasicType(
+ "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),
+ dwarf::DW_ATE_address)});
+
+ /// FIXME: If we fill the field `SizeInBits` with the actual size of
+ /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
+ TyCache.insert({IndexIndex, DBuilder.createBasicType(
+ "__coro_index",
+ (Layout.getTypeSizeInBits(IndexTy) < 8)
+ ? 8
+ : Layout.getTypeSizeInBits(IndexTy),
+ dwarf::DW_ATE_unsigned_char)});
+
+ for (auto *V : FrameData.getAllDefs()) {
+ if (DIVarCache.find(V) == DIVarCache.end())
+ continue;
+
+ auto Index = FrameData.getFieldIndex(V);
+
+ NameCache.insert({Index, DIVarCache[V]->getName()});
+ TyCache.insert({Index, DIVarCache[V]->getType()});
+ }
+
+ // Cache from index to (Align, Offset Pair)
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
+ // The Align and Offset of Resume function and Destroy function are fixed.
+ OffsetCache.insert({ResumeIndex, {8, 0}});
+ OffsetCache.insert({DestroyIndex, {8, 8}});
+ OffsetCache.insert(
+ {IndexIndex,
+ {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
+
+ for (auto *V : FrameData.getAllDefs()) {
+ auto Index = FrameData.getFieldIndex(V);
+
+ OffsetCache.insert(
+ {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});
+ }
+
+ DenseMap<Type *, DIType *> DITypeCache;
+ // This counter is used to avoid same type names. e.g., there would be
+ // many i32 and i64 types in one coroutine. And we would use i32_0 and
+ // i32_1 to avoid the same type. Since it makes no sense the name of the
+ // fields confilicts with each other.
+ unsigned UnknownTypeNum = 0;
+ for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
+ if (OffsetCache.find(Index) == OffsetCache.end())
+ continue;
+
+ std::string Name;
+ uint64_t SizeInBits;
+ uint32_t AlignInBits;
+ uint64_t OffsetInBits;
+ DIType *DITy = nullptr;
+
+ Type *Ty = FrameTy->getElementType(Index);
+ assert(Ty->isSized() && "We can't handle type which is not sized.\n");
+ SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize();
+ AlignInBits = OffsetCache[Index].first * 8;
+ OffsetInBits = OffsetCache[Index].second * 8;
+
+ if (NameCache.find(Index) != NameCache.end()) {
+ Name = NameCache[Index].str();
+ DITy = TyCache[Index];
+ } else {
+ DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
+ assert(DITy && "SolveDIType shouldn't return nullptr.\n");
+ Name = DITy->getName().str();
+ Name += "_" + std::to_string(UnknownTypeNum);
+ UnknownTypeNum++;
+ }
+
+ Elements.push_back(DBuilder.createMemberType(
+ FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
+ llvm::DINode::FlagArtificial, DITy));
+ }
+
+ DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
+
+ auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
+ DFile, LineNum, FrameDITy,
+ true, DINode::FlagArtificial);
+ assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));
+
+ // Subprogram would have ContainedNodes field which records the debug
+ // variables it contained. So we need to add __coro_frame to the
+ // ContainedNodes of it.
+ //
+ // If we don't add __coro_frame to the RetainedNodes, user may get
+ // `no symbol __coro_frame in context` rather than `__coro_frame`
+ // is optimized out, which is more precise.
+ if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {
+ auto RetainedNodes = SubProgram->getRetainedNodes();
+ SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
+ RetainedNodes.end());
+ RetainedNodesVec.push_back(FrameDIVar);
+ SubProgram->replaceOperandWith(
+ 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
+ }
+
+ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
+ DBuilder.createExpression(), DILoc,
+ Shape.FramePtr->getNextNode());
+}
+
// Build a struct that will keep state for an active coroutine.
// struct f.frame {
// ResumeFnTy ResumeFnAddr;
@@ -734,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return StructType::create(C, Name);
}();
- FrameTypeBuilder B(C, DL);
+ // We will use this value to cap the alignment of spilled values.
+ Optional<Align> MaxFrameAlignment;
+ if (Shape.ABI == coro::ABI::Async)
+ MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
+ FrameTypeBuilder B(C, DL, MaxFrameAlignment);
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
Optional<FieldIDType> SwitchIndexFieldId;
@@ -781,7 +1152,14 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
// Create an entry for every spilled value.
for (auto &S : FrameData.Spills) {
- FieldIDType Id = B.addField(S.first->getType(), None);
+ Type *FieldType = S.first->getType();
+ // For byval arguments, we need to store the pointed value in the frame,
+ // instead of the pointer itself.
+ if (const Argument *A = dyn_cast<Argument>(S.first))
+ if (A->hasByValAttr())
+ FieldType = A->getParamByValType();
+ FieldIDType Id =
+ B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);
FrameData.setFieldIndex(S.first, Id);
}
@@ -791,15 +1169,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
Shape.FrameSize = B.getStructSize();
switch (Shape.ABI) {
- case coro::ABI::Switch:
+ case coro::ABI::Switch: {
// In the switch ABI, remember the switch-index field.
- Shape.SwitchLowering.IndexField =
- B.getLayoutFieldIndex(*SwitchIndexFieldId);
+ auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
+ Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
+ Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
+ Shape.SwitchLowering.IndexOffset = IndexField.Offset;
// Also round the frame size up to a multiple of its alignment, as is
// generally expected in C/C++.
Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
break;
+ }
// In the retcon ABI, remember whether the frame is inline in the storage.
case coro::ABI::Retcon:
@@ -863,7 +1244,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
void visit(Instruction &I) {
- UserBBs.insert(I.getParent());
+ Users.insert(&I);
Base::visit(I);
// If the pointer is escaped prior to CoroBegin, we have to assume it would
// be written into before CoroBegin as well.
@@ -966,6 +1347,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
handleAlias(GEPI);
}
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.getIntrinsicID() != Intrinsic::lifetime_start)
+ return Base::visitIntrinsicInst(II);
+ LifetimeStarts.insert(&II);
+ }
+
void visitCallBase(CallBase &CB) {
for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
@@ -999,18 +1386,40 @@ private:
// after CoroBegin. Each entry contains the instruction and the offset in the
// original Alloca. They need to be recreated after CoroBegin off the frame.
DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
- SmallPtrSet<BasicBlock *, 2> UserBBs{};
+ SmallPtrSet<Instruction *, 4> Users{};
+ SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
bool MayWriteBeforeCoroBegin{false};
mutable llvm::Optional<bool> ShouldLiveOnFrame{};
bool computeShouldLiveOnFrame() const {
+ // If lifetime information is available, we check it first since it's
+ // more precise. We look at every pair of lifetime.start intrinsic and
+ // every basic block that uses the pointer to see if they cross suspension
+ // points. The uses cover both direct uses as well as indirect uses.
+ if (!LifetimeStarts.empty()) {
+ for (auto *I : Users)
+ for (auto *S : LifetimeStarts)
+ if (Checker.isDefinitionAcrossSuspend(*S, I))
+ return true;
+ return false;
+ }
+ // FIXME: Ideally the isEscaped check should come at the beginning.
+ // However there are a few loose ends that need to be fixed first before
+ // we can do that. We need to make sure we are not over-conservative, so
+ // that the data accessed in-between await_suspend and symmetric transfer
+ // is always put on the stack, and also data accessed after coro.end is
+ // always put on the stack (esp the return object). To fix that, we need
+ // to:
+ // 1) Potentially treat sret as nocapture in calls
+ // 2) Special handle the return object and put it on the stack
+ // 3) Utilize lifetime.end intrinsic
if (PI.isEscaped())
return true;
- for (auto *BB1 : UserBBs)
- for (auto *BB2 : UserBBs)
- if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))
+ for (auto *U1 : Users)
+ for (auto *U2 : Users)
+ if (Checker.isDefinitionAcrossSuspend(*U1, U2))
return true;
return false;
@@ -1072,6 +1481,15 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
return CleanupRet;
}
+static void createFramePtr(coro::Shape &Shape) {
+ auto *CB = Shape.CoroBegin;
+ IRBuilder<> Builder(CB->getNextNode());
+ StructType *FrameTy = Shape.FrameTy;
+ PointerType *FramePtrTy = FrameTy->getPointerTo();
+ Shape.FramePtr =
+ cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+}
+
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@@ -1098,11 +1516,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
- IRBuilder<> Builder(CB->getNextNode());
+ IRBuilder<> Builder(C);
StructType *FrameTy = Shape.FrameTy;
- PointerType *FramePtrTy = FrameTy->getPointerTo();
- auto *FramePtr =
- cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+ Instruction *FramePtr = Shape.FramePtr;
DominatorTree DT(*CB->getFunction());
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
@@ -1146,9 +1562,11 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
for (auto const &E : FrameData.Spills) {
Value *Def = E.first;
+ auto SpillAlignment = Align(FrameData.getAlign(Def));
// Create a store instruction storing the value into the
// coroutine frame.
Instruction *InsertPt = nullptr;
+ bool NeedToCopyArgPtrValue = false;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
// the coroutine frame pointer instruction, i.e. bitcast of
@@ -1159,6 +1577,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
// from the coroutine function.
Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
+ if (Arg->hasByValAttr())
+ NeedToCopyArgPtrValue = true;
+
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
// Don't spill immediately after a suspend; splitting assumes
// that the suspend will be followed by a branch.
@@ -1193,7 +1614,15 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
Builder.SetInsertPoint(InsertPt);
auto *G = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
- Builder.CreateStore(Def, G);
+ if (NeedToCopyArgPtrValue) {
+ // For byval arguments, we need to store the pointed value in the frame,
+ // instead of the pointer itself.
+ auto *Value =
+ Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
+ Builder.CreateAlignedStore(Value, G, SpillAlignment);
+ } else {
+ Builder.CreateAlignedStore(Def, G, SpillAlignment);
+ }
BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
@@ -1207,9 +1636,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *GEP = GetFramePointer(E.first);
GEP->setName(E.first->getName() + Twine(".reload.addr"));
- CurrentReload = Builder.CreateLoad(
- FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
- E.first->getName() + Twine(".reload"));
+ if (NeedToCopyArgPtrValue)
+ CurrentReload = GEP;
+ else
+ CurrentReload = Builder.CreateAlignedLoad(
+ FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
+ SpillAlignment, E.first->getName() + Twine(".reload"));
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
for (DbgDeclareInst *DDI : DIs) {
@@ -1223,7 +1655,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
&*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI);
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);
}
}
@@ -1271,8 +1703,8 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
}
// If we found any alloca, replace all of their remaining uses with GEP
- // instructions. Because new dbg.declare have been created for these alloca,
- // we also delete the original dbg.declare and replace other uses with undef.
+ // instructions. To remain debugbility, we replace the uses of allocas for
+ // dbg.declares and dbg.values with the reload from the frame.
// Note: We cannot replace the alloca with GEP instructions indiscriminately,
// as some of the uses may not be dominated by CoroBegin.
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
@@ -1290,17 +1722,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *G = GetFramePointer(Alloca);
G->setName(Alloca->getName() + Twine(".reload.addr"));
- SmallPtrSet<BasicBlock *, 4> SeenDbgBBs;
- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);
- if (!DIs.empty())
- DIBuilder(*Alloca->getModule(),
- /*AllowUnresolved*/ false)
- .insertDeclare(G, DIs.front()->getVariable(),
- DIs.front()->getExpression(),
- DIs.front()->getDebugLoc(), DIs.front());
- for (auto *DI : FindDbgDeclareUses(Alloca))
- DI->eraseFromParent();
- replaceDbgUsesWithUndef(Alloca);
+ SmallVector<DbgVariableIntrinsic *, 4> DIs;
+ findDbgUsers(DIs, Alloca);
+ for (auto *DVI : DIs)
+ DVI->replaceUsesOfWith(Alloca, G);
for (Instruction *I : UsersToUpdate)
I->replaceUsesOfWith(Alloca, G);
@@ -1326,7 +1751,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *FramePtrRaw =
Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
auto *AliasPtr = Builder.CreateGEP(
- FramePtrRaw,
+ Type::getInt8Ty(C), FramePtrRaw,
ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));
auto *AliasPtrTyped =
Builder.CreateBitCast(AliasPtr, Alias.first->getType());
@@ -1337,77 +1762,6 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
return FramePtr;
}
-// Sets the unwind edge of an instruction to a particular successor.
-static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
- if (auto *II = dyn_cast<InvokeInst>(TI))
- II->setUnwindDest(Succ);
- else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
- CS->setUnwindDest(Succ);
- else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
- CR->setUnwindDest(Succ);
- else
- llvm_unreachable("unexpected terminator instruction");
-}
-
-// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
-// block.
-static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
- BasicBlock *NewPred, PHINode *Until = nullptr) {
- unsigned BBIdx = 0;
- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // We manually update the LandingPadReplacement PHINode and it is the last
- // PHI Node. So, if we find it, we are done.
- if (Until == PN)
- break;
-
- // Reuse the previous value of BBIdx if it lines up. In cases where we
- // have multiple phi nodes with *lots* of predecessors, this is a speed
- // win because we don't have to scan the PHI looking for TIBB. This
- // happens because the BB list of PHI nodes are usually in the same
- // order.
- if (PN->getIncomingBlock(BBIdx) != OldPred)
- BBIdx = PN->getBasicBlockIndex(OldPred);
-
- assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
- PN->setIncomingBlock(BBIdx, NewPred);
- }
-}
-
-// Uses SplitEdge unless the successor block is an EHPad, in which case do EH
-// specific handling.
-static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
- LandingPadInst *OriginalPad,
- PHINode *LandingPadReplacement) {
- auto *PadInst = Succ->getFirstNonPHI();
- if (!LandingPadReplacement && !PadInst->isEHPad())
- return SplitEdge(BB, Succ);
-
- auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
- setUnwindEdgeTo(BB->getTerminator(), NewBB);
- updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
-
- if (LandingPadReplacement) {
- auto *NewLP = OriginalPad->clone();
- auto *Terminator = BranchInst::Create(Succ, NewBB);
- NewLP->insertBefore(Terminator);
- LandingPadReplacement->addIncoming(NewLP, NewBB);
- return NewBB;
- }
- Value *ParentPad = nullptr;
- if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
- ParentPad = FuncletPad->getParentPad();
- else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
- ParentPad = CatchSwitch->getParentPad();
- else
- llvm_unreachable("handling for other EHPads not implemented yet");
-
- auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
- CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
- return NewBB;
-}
-
// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
// PHI in InsertedBB.
static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
@@ -1503,6 +1857,24 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
}
}
+static void cleanupSinglePredPHIs(Function &F) {
+ SmallVector<PHINode *, 32> Worklist;
+ for (auto &BB : F) {
+ for (auto &Phi : BB.phis()) {
+ if (Phi.getNumIncomingValues() == 1) {
+ Worklist.push_back(&Phi);
+ } else
+ break;
+ }
+ }
+ while (!Worklist.empty()) {
+ auto *Phi = Worklist.back();
+ Worklist.pop_back();
+ auto *OriginalValue = Phi->getIncomingValue(0);
+ Phi->replaceAllUsesWith(OriginalValue);
+ }
+}
+
static void rewritePHIs(BasicBlock &BB) {
// For every incoming edge we will create a block holding all
// incoming values in a single PHI nodes.
@@ -1610,11 +1982,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
for (Instruction *U : E.second) {
// If we have not seen this block, materialize the value.
if (CurrentBlock != U->getParent()) {
- CurrentBlock = U->getParent();
+
+ bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
+ CurrentBlock = IsInCoroSuspendBlock
+ ? U->getParent()->getSinglePredecessor()
+ : U->getParent();
CurrentMaterialization = cast<Instruction>(Def)->clone();
CurrentMaterialization->setName(Def->getName());
CurrentMaterialization->insertBefore(
- &*CurrentBlock->getFirstInsertionPt());
+ IsInCoroSuspendBlock ? CurrentBlock->getTerminator()
+ : &*CurrentBlock->getFirstInsertionPt());
}
if (auto *PN = dyn_cast<PHINode>(U)) {
assert(PN->getNumIncomingValues() == 1 &&
@@ -2101,24 +2478,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
static void collectFrameAllocas(Function &F, coro::Shape &Shape,
const SuspendCrossingInfo &Checker,
SmallVectorImpl<AllocaInfo> &Allocas) {
- // Collect lifetime.start info for each alloca.
- using LifetimeStart = SmallPtrSet<Instruction *, 2>;
- llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;
- for (Instruction &I : instructions(F)) {
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
- continue;
-
- if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
- if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
-
- if (LifetimeMap.find(AI) == LifetimeMap.end())
- LifetimeMap[AI] = std::make_unique<LifetimeStart>();
- LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
- }
- }
- }
-
for (Instruction &I : instructions(F)) {
auto *AI = dyn_cast<AllocaInst>(&I);
if (!AI)
@@ -2128,23 +2487,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
if (AI == Shape.SwitchLowering.PromiseAlloca) {
continue;
}
- bool ShouldLiveOnFrame = false;
- auto Iter = LifetimeMap.find(AI);
- if (Iter != LifetimeMap.end()) {
- // Check against lifetime.start if the instruction has the info.
- for (User *U : I.users()) {
- for (auto *S : *Iter->second)
- if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))
- break;
- if (ShouldLiveOnFrame)
- break;
- }
- if (!ShouldLiveOnFrame)
- continue;
- }
- // At this point, either ShouldLiveOnFrame is true or we didn't have
- // lifetime information. We will need to rely on more precise pointer
- // tracking.
DominatorTree DT(F);
AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
*Shape.CoroBegin, Checker};
@@ -2158,58 +2500,94 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
void coro::salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
- DbgDeclareInst *DDI, bool LoadFromFramePtr) {
- Function *F = DDI->getFunction();
+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot) {
+ Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
while (isa<IntrinsicInst>(InsertPt))
++InsertPt;
Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
- DIExpression *Expr = DDI->getExpression();
+ DIExpression *Expr = DVI->getExpression();
// Follow the pointer arithmetic all the way to the incoming
// function argument and convert into a DIExpression.
- Value *Storage = DDI->getAddress();
+ bool OutermostLoad = true;
+ Value *Storage = DVI->getVariableLocationOp(0);
+ Value *OriginalStorage = Storage;
while (Storage) {
if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
Storage = LdInst->getOperand(0);
+ // FIXME: This is a heuristic that works around the fact that
+ // LLVM IR debug intrinsics cannot yet distinguish between
+ // memory and value locations: Because a dbg.declare(alloca) is
+ // implicitly a memory location no DW_OP_deref operation for the
+ // last direct load from an alloca is necessary. This condition
+ // effectively drops the *last* DW_OP_deref in the expression.
+ if (!OutermostLoad)
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ OutermostLoad = false;
} else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
Storage = StInst->getOperand(0);
} else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
- Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,
- /*WithStackValue=*/false);
+ SmallVector<Value *> AdditionalValues;
+ DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(
+ *GEPInst, Expr,
+ /*WithStackValue=*/false, 0, AdditionalValues);
+ // Debug declares cannot currently handle additional location
+ // operands.
+ if (!SalvagedExpr || !AdditionalValues.empty())
+ break;
+ Expr = SalvagedExpr;
Storage = GEPInst->getOperand(0);
} else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
Storage = BCInst->getOperand(0);
else
break;
}
+ if (!Storage)
+ return;
+
// Store a pointer to the coroutine frame object in an alloca so it
// is available throughout the function when producing unoptimized
// code. Extending the lifetime this way is correct because the
// variable has been declared by a dbg.declare intrinsic.
- if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) {
- auto &Cached = DbgPtrAllocaCache[Storage];
- if (!Cached) {
- Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
- Arg->getName() + ".debug");
- Builder.CreateStore(Storage, Cached);
+ //
+ // Avoid to create the alloca would be eliminated by optimization
+ // passes and the corresponding dbg.declares would be invalid.
+ if (!ReuseFrameSlot && !EnableReuseStorageInFrame)
+ if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
+ auto &Cached = DbgPtrAllocaCache[Storage];
+ if (!Cached) {
+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
+ Arg->getName() + ".debug");
+ Builder.CreateStore(Storage, Cached);
+ }
+ Storage = Cached;
+ // FIXME: LLVM lacks nuanced semantics to differentiate between
+ // memory and direct locations at the IR level. The backend will
+ // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
+ // location. Thus, if there are deref and offset operations in the
+ // expression, we need to add a DW_OP_deref at the *start* of the
+ // expression to first load the contents of the alloca before
+ // adjusting it with the expression.
+ if (Expr && Expr->isComplex())
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
- Storage = Cached;
- }
- // The FramePtr object adds one extra layer of indirection that
- // needs to be unwrapped.
- if (LoadFromFramePtr)
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- auto &VMContext = DDI->getFunction()->getContext();
- DDI->setOperand(
- 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage)));
- DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr));
- if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage))
- DDI->moveAfter(InsertPt);
+
+ DVI->replaceVariableLocationOp(OriginalStorage, Storage);
+ DVI->setExpression(Expr);
+ /// It makes no sense to move the dbg.value intrinsic.
+ if (!isa<DbgValueInst>(DVI)) {
+ if (auto *InsertPt = dyn_cast<Instruction>(Storage))
+ DVI->moveAfter(InsertPt);
+ else if (isa<Argument>(Storage))
+ DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());
+ }
}
void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
- eliminateSwiftError(F, Shape);
+ // Don't eliminate swifterror in async functions that won't be split.
+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
+ eliminateSwiftError(F, Shape);
if (Shape.ABI == coro::ABI::Switch &&
Shape.SwitchLowering.PromiseAlloca) {
@@ -2246,6 +2624,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
}
+ // Later code makes structural assumptions about single predecessors phis e.g
+ // that they are not live accross a suspend point.
+ cleanupSinglePredPHIs(F);
+
// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
// never has its definition separated from the PHI by the suspend point.
rewritePHIs(F);
@@ -2263,11 +2645,19 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
for (int Repeat = 0; Repeat < 4; ++Repeat) {
// See if there are materializable instructions across suspend points.
for (Instruction &I : instructions(F))
- if (materializable(I))
+ if (materializable(I)) {
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U))
Spills[&I].push_back(cast<Instruction>(U));
+ // Manually add dbg.value metadata uses of I.
+ SmallVector<DbgValueInst *, 16> DVIs;
+ findDbgValues(DVIs, &I);
+ for (auto *DVI : DVIs)
+ if (Checker.isDefinitionAcrossSuspend(I, DVI))
+ Spills[&I].push_back(DVI);
+ }
+
if (Spills.empty())
break;
@@ -2280,7 +2670,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
sinkLifetimeStartMarkers(F, Shape, Checker);
- collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
+ collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
// Collect the spills for arguments and other not-materializable values.
@@ -2339,12 +2730,30 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
FrameData.Spills[&I].push_back(cast<Instruction>(U));
}
}
+
+ // We don't want the layout of coroutine frame to be affected
+ // by debug information. So we only choose to salvage DbgValueInst for
+ // whose value is already in the frame.
+ // We would handle the dbg.values for allocas specially
+ for (auto &Iter : FrameData.Spills) {
+ auto *V = Iter.first;
+ SmallVector<DbgValueInst *, 16> DVIs;
+ findDbgValues(DVIs, V);
+ llvm::for_each(DVIs, [&](DbgValueInst *DVI) {
+ if (Checker.isDefinitionAcrossSuspend(*V, DVI))
+ FrameData.Spills[V].push_back(DVI);
+ });
+ }
+
LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
Shape.ABI == coro::ABI::Async)
sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, FrameData);
- Shape.FramePtr = insertSpills(FrameData, Shape);
+ createFramePtr(Shape);
+ // For now, this works for C++ programs only.
+ buildFrameDebugInfo(F, Shape, FrameData);
+ insertSpills(FrameData, Shape);
lowerLocalAllocas(LocalAllocas, DeadInstructions);
for (auto I : DeadInstructions)