diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/CodeGen | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
download | src-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz src-344a3780b2e33f6ca763666c380202b18aab72a3.zip |
Vendor import of llvm-project main 88e66fa60ae5, the last commit beforevendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
the upstream release/13.x branch was created.
Diffstat (limited to 'llvm/lib/CodeGen')
216 files changed, 17679 insertions, 7054 deletions
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index acf8553f7205..87a3cede601b 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -153,9 +153,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { std::vector<unsigned> &DefIndices = State->GetDefIndices(); // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - for (const auto &LI : (*SI)->liveins()) { + for (MachineBasicBlock *Succ : BB->successors()) + for (const auto &LI : Succ->liveins()) { for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; State->UnionGroups(Reg, 0); @@ -259,11 +258,10 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( /// in SU that we want to consider for breaking. static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) { SmallSet<unsigned, 4> RegSet; - for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { - if (RegSet.insert(P->getReg()).second) - Edges.push_back(&*P); + for (const SDep &Pred : SU->Preds) { + if ((Pred.getKind() == SDep::Anti) || (Pred.getKind() == SDep::Output)) { + if (RegSet.insert(Pred.getReg()).second) + Edges.push_back(&Pred); } } } @@ -275,17 +273,16 @@ static const SUnit *CriticalPathStep(const SUnit *SU) { unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. if (SU) { - for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - const SUnit *PredSU = P->getSUnit(); - unsigned PredLatency = P->getLatency(); + for (const SDep &Pred : SU->Preds) { + const SUnit *PredSU = Pred.getSUnit(); + unsigned PredLatency = Pred.getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over // other types of edges. if (NextDepth < PredTotalLatency || - (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + (NextDepth == PredTotalLatency && Pred.getKind() == SDep::Anti)) { NextDepth = PredTotalLatency; - Next = &*P; + Next = &Pred; } } } @@ -886,25 +883,24 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), - PE = PathSU->Preds.end(); P != PE; ++P) { - if (P->getSUnit() == NextSU ? - (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : - (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + for (const SDep &Pred : PathSU->Preds) { + if (Pred.getSUnit() == NextSU ? (Pred.getKind() != SDep::Anti || + Pred.getReg() != AntiDepReg) + : (Pred.getKind() == SDep::Data && + Pred.getReg() == AntiDepReg)) { AntiDepReg = 0; break; } } - for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), - PE = PathSU->Preds.end(); P != PE; ++P) { - if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && - (P->getKind() != SDep::Output)) { + for (const SDep &Pred : PathSU->Preds) { + if ((Pred.getSUnit() == NextSU) && (Pred.getKind() != SDep::Anti) && + (Pred.getKind() != SDep::Output)) { LLVM_DEBUG(dbgs() << " (real dependency)\n"); AntiDepReg = 0; break; - } else if ((P->getSUnit() != NextSU) && - (P->getKind() == SDep::Data) && - (P->getReg() == AntiDepReg)) { + } else if ((Pred.getSUnit() != NextSU) && + (Pred.getKind() == SDep::Data) && + (Pred.getReg() == AntiDepReg)) { LLVM_DEBUG(dbgs() << " (other dependency)\n"); AntiDepReg = 0; break; @@ -956,10 +952,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( << printReg(AntiDepReg, TRI) << ":"); // Handle each group register... - for (std::map<unsigned, unsigned>::iterator - S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { - unsigned CurrReg = S->first; - unsigned NewReg = S->second; + for (const auto &P : RenameMap) { + unsigned CurrReg = P.first; + unsigned NewReg = P.second; LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" << printReg(NewReg, TRI) << "(" diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index ebeff1fec30b..e5d576d879b5 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -43,13 +43,11 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(Ty)) { - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) { - if (Indices && *Indices == unsigned(EI - EB)) - return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); + for (auto I : llvm::enumerate(STy->elements())) { + Type *ET = I.value(); + if (Indices && *Indices == I.index()) + return ComputeLinearIndex(ET, Indices + 1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(ET, nullptr, nullptr, CurIndex); } assert(!Indices && "Unexpected out of bound"); return CurIndex; @@ -513,9 +511,10 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) { // not profitable. Also, if the callee is a special function (e.g. // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. - if (!Ret && - ((!TM.Options.GuaranteedTailCallOpt && - Call.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term))) + if (!Ret && ((!TM.Options.GuaranteedTailCallOpt && + Call.getCallingConv() != CallingConv::Tail && + Call.getCallingConv() != CallingConv::SwiftTail) || + !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a @@ -562,14 +561,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. - CallerAttrs.removeAttribute(Attribute::NoAlias); - CalleeAttrs.removeAttribute(Attribute::NoAlias); - CallerAttrs.removeAttribute(Attribute::NonNull); - CalleeAttrs.removeAttribute(Attribute::NonNull); - CallerAttrs.removeAttribute(Attribute::Dereferenceable); - CalleeAttrs.removeAttribute(Attribute::Dereferenceable); - CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull); - CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull); + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull}) { + CallerAttrs.removeAttribute(Attr); + CalleeAttrs.removeAttribute(Attr); + } if (CallerAttrs.contains(Attribute::ZExt)) { if (!CalleeAttrs.contains(Attribute::ZExt)) diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 95d878e65be4..964cef75d164 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -61,6 +61,9 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, } void AIXException::endFunction(const MachineFunction *MF) { + // There is no easy way to access register information in `AIXException` + // class. when ShouldEmitEHBlock is false and VRs are saved, A dumy eh info + // table are emitted in PPCAIXAsmPrinter::emitFunctionBodyEnd. if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF)) return; @@ -69,8 +72,8 @@ void AIXException::endFunction(const MachineFunction *MF) { const Function &F = MF->getFunction(); assert(F.hasPersonalityFn() && "Landingpads are presented, but no personality routine is found."); - const Function *Per = - dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + const GlobalValue *Per = + dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PerSym = Asm->TM.getSymbol(Per); emitExceptionInfoTable(LSDALabel, PerSym); diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index b634b24377fe..db4215e92d44 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -39,13 +39,13 @@ void ARMException::beginFunction(const MachineFunction *MF) { if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) getTargetStreamer().emitFnStart(); // See if we need call frame info. - AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); - assert(MoveType != AsmPrinter::CFI_M_EH && + AsmPrinter::CFISection CFISecType = Asm->getFunctionCFISectionType(*MF); + assert(CFISecType != AsmPrinter::CFISection::EH && "non-EH CFI not yet supported in prologue with EHABI lowering"); - if (MoveType == AsmPrinter::CFI_M_Debug) { + if (CFISecType == AsmPrinter::CFISection::Debug) { if (!hasEmittedCFISections) { - if (Asm->needsOnlyDebugCFIMoves()) + if (Asm->getModuleCFISectionType() == AsmPrinter::CFISection::Debug) Asm->OutStreamer->emitCFISections(false, true); hasEmittedCFISections = true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 4e45a0ffc60f..65c45f73e965 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -205,7 +205,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { : CompUnitCount(CompUnitCount), BucketCount(BucketCount), NameCount(NameCount) {} - void emit(const Dwarf5AccelTableWriter &Ctx) const; + void emit(Dwarf5AccelTableWriter &Ctx); }; struct AttributeEncoding { dwarf::Index Index; @@ -216,8 +216,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations; ArrayRef<MCSymbol *> CompUnits; llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry; - MCSymbol *ContributionStart = Asm->createTempSymbol("names_start"); - MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end"); + MCSymbol *ContributionEnd = nullptr; MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start"); MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end"); MCSymbol *EntryPool = Asm->createTempSymbol("names_entries"); @@ -240,7 +239,7 @@ public: ArrayRef<MCSymbol *> CompUnits, llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry); - void emit() const; + void emit(); }; } // namespace @@ -327,9 +326,9 @@ void AppleAccelTableWriter::emitBuckets() const { void AppleAccelTableWriter::emitData() const { const auto &Buckets = Contents.getBuckets(); - for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (const AccelTableBase::HashList &Bucket : Buckets) { uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (auto &Hash : Buckets[i]) { + for (auto &Hash : Bucket) { // Terminate the previous entry if there is no hash collision with the // current one. if (PrevHash != std::numeric_limits<uint64_t>::max() && @@ -346,7 +345,7 @@ void AppleAccelTableWriter::emitData() const { PrevHash = Hash->HashValue; } // Emit the final end marker for the bucket. - if (!Buckets[i].empty()) + if (!Bucket.empty()) Asm->emitInt32(0); } } @@ -361,14 +360,12 @@ void AppleAccelTableWriter::emit() const { } template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::Header::emit( - const Dwarf5AccelTableWriter &Ctx) const { +void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) { assert(CompUnitCount > 0 && "Index must have at least one CU."); AsmPrinter *Asm = Ctx.Asm; - Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart, - "Header: unit length"); - Asm->OutStreamer->emitLabel(Ctx.ContributionStart); + Ctx.ContributionEnd = + Asm->emitDwarfUnitLength("names", "Header: unit length"); Asm->OutStreamer->AddComment("Header: version"); Asm->emitInt16(Version); Asm->OutStreamer->AddComment("Header: padding"); @@ -526,7 +523,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( Abbreviations.try_emplace(Tag, UniformAttributes); } -template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const { +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() { Header.emit(*this); emitCUList(); emitBuckets(); diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 3df8e35accc4..21da9d50efba 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -25,12 +25,9 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); - StringRef Prefix = "debug_addr_"; - MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start"); - MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); - Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution"); - Asm.OutStreamer->emitLabel(BeginLabel); + MCSymbol *EndLabel = + Asm.emitDwarfUnitLength("debug_addr", "Length of contribution"); Asm.OutStreamer->AddComment("DWARF version number"); Asm.emitInt16(Asm.getDwarfVersion()); Asm.OutStreamer->AddComment("Address size"); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 85754bf29d0c..e528d33b5f8c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -38,7 +38,6 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" @@ -60,6 +59,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -68,6 +68,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GCStrategy.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalIndirectSymbol.h" @@ -110,6 +111,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -296,8 +298,24 @@ bool AsmPrinter::doInitialization(Module &M) { // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - OutStreamer->emitFileDirective( - llvm::sys::path::filename(M.getSourceFileName())); + + SmallString<128> FileName; + if (MAI->hasBasenameOnlyForFileDirective()) + FileName = llvm::sys::path::filename(M.getSourceFileName()); + else + FileName = M.getSourceFileName(); + if (MAI->hasFourStringsDotFile()) { +#ifdef PACKAGE_VENDOR + const char VerStr[] = + PACKAGE_VENDOR " " PACKAGE_NAME " version " PACKAGE_VERSION; +#else + const char VerStr[] = PACKAGE_NAME " version " PACKAGE_VERSION; +#endif + // TODO: Add timestamp and description. + OutStreamer->emitFileDirective(FileName, VerStr, "", ""); + } else { + OutStreamer->emitFileDirective(FileName); + } } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -341,37 +359,39 @@ bool AsmPrinter::doInitialization(Module &M) { } if (M.getNamedMetadata(PseudoProbeDescMetadataName)) { - PP = new PseudoProbeHandler(this, &M); + PP = new PseudoProbeHandler(this); Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName, PPTimerDescription, PPGroupName, PPGroupDescription); } switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::None: + // We may want to emit CFI for debug. + LLVM_FALLTHROUGH; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - isCFIMoveForDebugging = true; - if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) - break; - for (auto &F: M.getFunctionList()) { - // If the module contains any function with unwind data, - // .eh_frame has to be emitted. - // Ignore functions that won't get emitted. - if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) { - isCFIMoveForDebugging = false; + for (auto &F : M.getFunctionList()) { + if (getFunctionCFISectionType(F) != CFISection::None) + ModuleCFISection = getFunctionCFISectionType(F); + // If any function needsUnwindTableEntry(), it needs .eh_frame and hence + // the module needs .eh_frame. If we have found that case, we are done. + if (ModuleCFISection == CFISection::EH) break; - } } + assert(MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI || + ModuleCFISection != CFISection::EH); break; default: - isCFIMoveForDebugging = false; break; } EHStreamer *ES = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: - break; + if (!needsCFIForDebug()) + break; + LLVM_FALLTHROUGH; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: ES = new DwarfCFIException(this); @@ -709,7 +729,12 @@ void AsmPrinter::emitFunctionHeader() { emitConstantPool(); // Print the 'header' of function. - MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); + // If basic block sections are desired, explicitly request a unique section + // for this function's entry block. + if (MF->front().isBeginSection()) + MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM)); + else + MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); OutStreamer->SwitchSection(MF->getSection()); if (!MAI->hasVisibilityOnlyWithLinkage()) @@ -786,6 +811,16 @@ void AsmPrinter::emitFunctionHeader() { // their wild and crazy things as required. emitFunctionEntryLabel(); + // If the function had address-taken blocks that got deleted, then we have + // references to the dangling symbols. Emit them at the start of the function + // so that we don't get references to undefined symbols. + std::vector<MCSymbol*> DeadBlockSyms; + MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); + for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + OutStreamer->AddComment("Address taken block that was later removed"); + OutStreamer->emitLabel(DeadBlockSyms[i]); + } + if (CurrentFnBegin) { if (MAI->useAssignmentForEHBegin()) { MCSymbol *CurPos = OutContext.createTempSymbol(); @@ -819,9 +854,6 @@ void AsmPrinter::emitFunctionEntryLabel() { if (CurrentFnSym->isVariable()) report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' is a protected alias"); - if (CurrentFnSym->isDefined()) - report_fatal_error("'" + Twine(CurrentFnSym->getName()) + - "' label emitted multiple times to assembly file"); OutStreamer->emitLabel(CurrentFnSym); @@ -900,7 +932,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { /// means the target will need to handle MI in EmitInstruction. static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // This code handles only the 4-operand target-independent form. - if (MI->getNumOperands() != 4) + if (MI->isNonListDebugValue() && MI->getNumOperands() != 4) return false; SmallString<128> Str; @@ -916,19 +948,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << V->getName(); OS << " <- "; - // The second operand is only an offset if it's an immediate. - bool MemLoc = MI->isIndirectDebugValue(); - auto Offset = StackOffset::getFixed(MemLoc ? MI->getOperand(1).getImm() : 0); const DIExpression *Expr = MI->getDebugExpression(); if (Expr->getNumElements()) { OS << '['; - bool NeedSep = false; + ListSeparator LS; for (auto Op : Expr->expr_ops()) { - if (NeedSep) - OS << ", "; - else - NeedSep = true; - OS << dwarf::OperationEncodingString(Op.getOp()); + OS << LS << dwarf::OperationEncodingString(Op.getOp()); for (unsigned I = 0; I < Op.getNumArgs(); ++I) OS << ' ' << Op.getArg(I); } @@ -936,56 +961,71 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } // Register or immediate value. Register 0 means undef. - if (MI->getDebugOperand(0).isFPImm()) { - APFloat APF = APFloat(MI->getDebugOperand(0).getFPImm()->getValueAPF()); - if (MI->getDebugOperand(0).getFPImm()->getType()->isFloatTy()) { - OS << (double)APF.convertToFloat(); - } else if (MI->getDebugOperand(0).getFPImm()->getType()->isDoubleTy()) { - OS << APF.convertToDouble(); - } else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &ignored); - OS << "(long double) " << APF.convertToDouble(); + for (const MachineOperand &Op : MI->debug_operands()) { + if (&Op != MI->debug_operands().begin()) + OS << ", "; + switch (Op.getType()) { + case MachineOperand::MO_FPImmediate: { + APFloat APF = APFloat(Op.getFPImm()->getValueAPF()); + Type *ImmTy = Op.getFPImm()->getType(); + if (ImmTy->isBFloatTy() || ImmTy->isHalfTy() || ImmTy->isFloatTy() || + ImmTy->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + break; } - } else if (MI->getDebugOperand(0).isImm()) { - OS << MI->getDebugOperand(0).getImm(); - } else if (MI->getDebugOperand(0).isCImm()) { - MI->getDebugOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); - } else if (MI->getDebugOperand(0).isTargetIndex()) { - auto Op = MI->getDebugOperand(0); - OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; - // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer->emitRawComment(OS.str()); - return true; - } else { - Register Reg; - if (MI->getDebugOperand(0).isReg()) { - Reg = MI->getDebugOperand(0).getReg(); - } else { - assert(MI->getDebugOperand(0).isFI() && "Unknown operand type"); - const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); - Offset += TFI->getFrameIndexReference( - *AP.MF, MI->getDebugOperand(0).getIndex(), Reg); - MemLoc = true; + case MachineOperand::MO_Immediate: { + OS << Op.getImm(); + break; } - if (Reg == 0) { - // Suppress offset, it is not meaningful here. - OS << "undef"; + case MachineOperand::MO_CImmediate: { + Op.getCImm()->getValue().print(OS, false /*isSigned*/); + break; + } + case MachineOperand::MO_TargetIndex: { + OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")"; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer->emitRawComment(OS.str()); - return true; + break; + } + case MachineOperand::MO_Register: + case MachineOperand::MO_FrameIndex: { + Register Reg; + Optional<StackOffset> Offset; + if (Op.isReg()) { + Reg = Op.getReg(); + } else { + const TargetFrameLowering *TFI = + AP.MF->getSubtarget().getFrameLowering(); + Offset = TFI->getFrameIndexReference(*AP.MF, Op.getIndex(), Reg); + } + if (!Reg) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + break; + } + // The second operand is only an offset if it's an immediate. + if (MI->isIndirectDebugValue()) + Offset = StackOffset::getFixed(MI->getDebugOffset().getImm()); + if (Offset) + OS << '['; + OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); + if (Offset) + OS << '+' << Offset->getFixed() << ']'; + break; + } + default: + llvm_unreachable("Unknown operand type"); } - if (MemLoc) - OS << '['; - OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } - if (MemLoc) - OS << '+' << Offset.getFixed() << ']'; - // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer->emitRawComment(OS.str()); return true; @@ -1016,28 +1056,44 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { return true; } -AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { +AsmPrinter::CFISection +AsmPrinter::getFunctionCFISectionType(const Function &F) const { + // Ignore functions that won't get emitted. + if (F.isDeclarationForLinker()) + return CFISection::None; + if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && - MF->getFunction().needsUnwindTableEntry()) - return CFI_M_EH; + F.needsUnwindTableEntry()) + return CFISection::EH; + + if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection) + return CFISection::Debug; - if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection) - return CFI_M_Debug; + return CFISection::None; +} - return CFI_M_None; +AsmPrinter::CFISection +AsmPrinter::getFunctionCFISectionType(const MachineFunction &MF) const { + return getFunctionCFISectionType(MF.getFunction()); } bool AsmPrinter::needsSEHMoves() { return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry(); } +bool AsmPrinter::needsCFIForDebug() const { + return MAI->getExceptionHandlingType() == ExceptionHandling::None && + MAI->doesUseCFIForDebug() && ModuleCFISection == CFISection::Debug; +} + void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); - if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + if (!needsCFIForDebug() && + ExceptionHandlingType != ExceptionHandling::DwarfCFI && ExceptionHandlingType != ExceptionHandling::ARM) return; - if (needsCFIMoves() == CFI_M_None) + if (getFunctionCFISectionType(*MF) == CFISection::None) return; // If there is no "real" instruction following this CFI instruction, skip @@ -1068,17 +1124,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { /// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a /// given basic block. This can be used to capture more precise profile -/// information. We use the last 3 bits (LSBs) to ecnode the following +/// information. We use the last 4 bits (LSBs) to encode the following /// information: /// * (1): set if return block (ret or tail call). /// * (2): set if ends with a tail call. /// * (3): set if exception handling (EH) landing pad. +/// * (4): set if the block can fall through to its next. /// The remaining bits are zero. static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) { const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); return ((unsigned)MBB.isReturnBlock()) | ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) | - (MBB.isEHPad() << 2); + (MBB.isEHPad() << 2) | + (const_cast<MachineBasicBlock &>(MBB).canFallThrough() << 3); } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1141,6 +1199,37 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { OutStreamer->PopSection(); } +void AsmPrinter::emitStackUsage(const MachineFunction &MF) { + const std::string &OutputFilename = MF.getTarget().Options.StackUsageOutput; + + // OutputFilename empty implies -fstack-usage is not passed. + if (OutputFilename.empty()) + return; + + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + uint64_t StackSize = FrameInfo.getStackSize(); + + if (StackUsageStream == nullptr) { + std::error_code EC; + StackUsageStream = + std::make_unique<raw_fd_ostream>(OutputFilename, EC, sys::fs::OF_Text); + if (EC) { + errs() << "Could not open file: " << EC.message(); + return; + } + } + + *StackUsageStream << MF.getFunction().getParent()->getName(); + if (const DISubprogram *DSP = MF.getFunction().getSubprogram()) + *StackUsageStream << ':' << DSP->getLine(); + + *StackUsageStream << ':' << MF.getName() << '\t' << StackSize << '\t'; + if (FrameInfo.hasVarSizedObjects()) + *StackUsageStream << "dynamic\n"; + else + *StackUsageStream << "static\n"; +} + static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) { MachineModuleInfo &MMI = MF.getMMI(); if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo()) @@ -1227,6 +1316,7 @@ void AsmPrinter::emitFunctionBody() { emitInlineAsm(&MI); break; case TargetOpcode::DBG_VALUE: + case TargetOpcode::DBG_VALUE_LIST: if (isVerbose()) { if (!emitDebugValueComment(&MI, *this)) emitInstruction(&MI); @@ -1237,6 +1327,10 @@ void AsmPrinter::emitFunctionBody() { // location, and a nearby DBG_VALUE created. We can safely ignore // the instruction reference. break; + case TargetOpcode::DBG_PHI: + // This instruction is only used to label a program point, it's purely + // meta information. + break; case TargetOpcode::DBG_LABEL: if (isVerbose()) { if (!emitDebugLabelComment(&MI, *this)) @@ -1252,6 +1346,10 @@ void AsmPrinter::emitFunctionBody() { case TargetOpcode::PSEUDO_PROBE: emitPseudoProbe(MI); break; + case TargetOpcode::ARITH_FENCE: + if (isVerbose()) + OutStreamer->emitRawComment("ARITH_FENCE"); + break; default: emitInstruction(&MI); if (CanDoExtraAnalysis) { @@ -1277,11 +1375,9 @@ void AsmPrinter::emitFunctionBody() { // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a - // section (except the section containing the entry basic block as the end - // symbol for that section is CurrentFnEnd). + // section. if (MF->hasBBLabels() || - (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() && - !MBB.sameSection(&MF->front()))) + (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); if (MBB.isEndSection()) { @@ -1352,8 +1448,7 @@ void AsmPrinter::emitFunctionBody() { const Triple &TT = TM.getTargetTriple(); if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() || (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) { - MCInst Noop; - MF->getSubtarget().getInstrInfo()->getNoop(Noop); + MCInst Noop = MF->getSubtarget().getInstrInfo()->getNop(); // Targets can opt-out of emitting the noop here by leaving the opcode // unspecified. @@ -1418,13 +1513,16 @@ void AsmPrinter::emitFunctionBody() { } // Emit section containing BB address offsets and their metadata, when - // BB labels are requested for this function. - if (MF->hasBBLabels()) + // BB labels are requested for this function. Skip empty functions. + if (MF->hasBBLabels() && HasAnyRealCode) emitBBAddrMapSection(*MF); // Emit section containing stack size metadata. emitStackSizeSection(*MF); + // Emit .su file containing function stack size information. + emitStackUsage(*MF); + emitPatchableFunctionEntries(); if (isVerbose()) @@ -1600,7 +1698,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { std::string Buf; raw_string_ostream OS(Buf); std::unique_ptr<remarks::MetaSerializer> MetaSerializer = - Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename)) + Filename ? RemarkSerializer.metaSerializer(OS, Filename->str()) : RemarkSerializer.metaSerializer(OS); MetaSerializer->emit(); @@ -1814,11 +1912,12 @@ bool AsmPrinter::doFinalization(Module &M) { if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. OutStreamer->emitAddrsig(); - for (const GlobalValue &GV : M.global_values()) - if (!GV.use_empty() && !GV.isThreadLocal() && - !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && - !GV.hasAtLeastLocalUnnamedAddr()) + for (const GlobalValue &GV : M.global_values()) { + if (!GV.use_empty() && !GV.isTransitiveUsedByMetadataOnly() && + !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() && + !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->emitAddrsigSym(getSymbol(&GV)); + } } // Emit symbol partition specifications (ELF only). @@ -1831,7 +1930,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection( OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, - "", ++UniqueID, nullptr)); + "", false, ++UniqueID, nullptr)); OutStreamer->emitBytes(GV.getPartition()); OutStreamer->emitZeros(1); OutStreamer->emitValue( @@ -2225,6 +2324,11 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, if (Structors.empty()) return; + // Emit the structors in reverse order if we are using the .ctor/.dtor + // initialization scheme. + if (!TM.Options.UseInitArray) + std::reverse(Structors.begin(), Structors.end()); + const Align Align = DL.getPointerPrefAlignment(); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); @@ -2992,8 +3096,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { } void AsmPrinter::emitNops(unsigned N) { - MCInst Nop; - MF->getSubtarget().getInstrInfo()->getNoop(Nop); + MCInst Nop = MF->getSubtarget().getInstrInfo()->getNop(); for (; N; --N) EmitToStreamer(*OutStreamer, Nop); } @@ -3201,6 +3304,11 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { } } + if (MBB.isEHCatchretTarget() && + MAI->getExceptionHandlingType() == ExceptionHandling::WinEH) { + OutStreamer->emitLabel(MBB.getEHCatchretSymbol()); + } + // With BB sections, each basic block must handle CFI information on its own // if it begins a section (Entry block is handled separately by // AsmPrinterHandler::beginFunction). @@ -3378,13 +3486,13 @@ void AsmPrinter::emitXRayTable() { GroupName = F.getComdat()->getName(); } InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - Flags, 0, GroupName, + Flags, 0, GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym); if (!TM.Options.XRayOmitFunctionIndex) FnSledIndex = OutContext.getELFSection( "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0, - GroupName, MCSection::NonUniqueID, LinkedToSym); + GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym); } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); @@ -3468,9 +3576,9 @@ void AsmPrinter::emitPatchableFunctionEntries() { const MCSymbolELF *LinkedToSym = nullptr; StringRef GroupName; - // GNU as < 2.35 did not support section flag 'o'. Use SHF_LINK_ORDER only - // if we are using the integrated assembler. - if (MAI->useIntegratedAssembler()) { + // GNU as < 2.35 did not support section flag 'o'. GNU ld < 2.36 did not + // support mixed SHF_LINK_ORDER and non-SHF_LINK_ORDER sections. + if (MAI->useIntegratedAssembler() || MAI->binutilsIsAtLeast(2, 36)) { Flags |= ELF::SHF_LINK_ORDER; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; @@ -3480,7 +3588,7 @@ void AsmPrinter::emitPatchableFunctionEntries() { } OutStreamer->SwitchSection(OutContext.getELFSection( "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName, - MCSection::NonUniqueID, LinkedToSym)); + F.hasComdat(), MCSection::NonUniqueID, LinkedToSym)); emitAlignment(Align(PointerSize)); OutStreamer->emitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index c6e43445e7d0..fc127f4cf9da 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -198,26 +198,14 @@ void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const { OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize()); } -void AsmPrinter::maybeEmitDwarf64Mark() const { - if (!isDwarf64()) - return; - OutStreamer->AddComment("DWARF64 Mark"); - OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64); -} - void AsmPrinter::emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const { - assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved); - maybeEmitDwarf64Mark(); - OutStreamer->AddComment(Comment); - OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize()); + OutStreamer->emitDwarfUnitLength(Length, Comment); } -void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo, - const Twine &Comment) const { - maybeEmitDwarf64Mark(); - OutStreamer->AddComment(Comment); - OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize()); +MCSymbol *AsmPrinter::emitDwarfUnitLength(const Twine &Prefix, + const Twine &Comment) const { + return OutStreamer->emitDwarfUnitLength(Prefix, Comment); } void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, @@ -257,6 +245,10 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpDefCfaRegister: OutStreamer->emitCFIDefCfaRegister(Inst.getRegister()); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(), + Inst.getAddressSpace()); + break; case MCCFIInstruction::OpOffset: OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4a67b0bc2c4d..4a93181f5439 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -39,54 +40,12 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an -/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo -/// struct above. -static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { - AsmPrinter::SrcMgrDiagInfo *DiagInfo = - static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo); - assert(DiagInfo && "Diagnostic context not passed down?"); - - // Look up a LocInfo for the buffer this diagnostic is coming from. - unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc()); - const MDNode *LocInfo = nullptr; - if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size()) - LocInfo = DiagInfo->LocInfos[BufNum-1]; - - // If the inline asm had metadata associated with it, pull out a location - // cookie corresponding to which line the error occurred on. - unsigned LocCookie = 0; - if (LocInfo) { - unsigned ErrorLine = Diag.getLineNo()-1; - if (ErrorLine >= LocInfo->getNumOperands()) - ErrorLine = 0; - - if (LocInfo->getNumOperands() != 0) - if (const ConstantInt *CI = - mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) - LocCookie = CI->getZExtValue(); - } - - DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); -} - unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, const MDNode *LocMDNode) const { - if (!DiagInfo) { - DiagInfo = std::make_unique<SrcMgrDiagInfo>(); - - MCContext &Context = MMI->getContext(); - Context.setInlineSourceManager(&DiagInfo->SrcMgr); - - LLVMContext &LLVMCtx = MMI->getModule()->getContext(); - if (LLVMCtx.getInlineAsmDiagnosticHandler()) { - DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); - DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get()); - } - } - - SourceMgr &SrcMgr = DiagInfo->SrcMgr; + MCContext &Context = MMI->getContext(); + Context.initInlineSourceManager(); + SourceMgr &SrcMgr = *Context.getInlineSourceManager(); + std::vector<const MDNode *> &LocInfos = Context.getLocInfos(); std::unique_ptr<MemoryBuffer> Buffer; // The inline asm source manager will outlive AsmStr, so make a copy of the @@ -98,8 +57,8 @@ unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, // Store LocMDNode in DiagInfo, using BufNum as an identifier. if (LocMDNode) { - DiagInfo->LocInfos.resize(BufNum); - DiagInfo->LocInfos[BufNum - 1] = LocMDNode; + LocInfos.resize(BufNum); + LocInfos[BufNum - 1] = LocMDNode; } return BufNum; @@ -119,13 +78,14 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, Str = Str.substr(0, Str.size()-1); // If the output streamer does not have mature MC support or the integrated - // assembler has been disabled, just emit the blob textually. + // assembler has been disabled or not required, just emit the blob textually. // Otherwise parse the asm and emit it via MC support. // This is useful in case the asm parser doesn't handle something but the // system assembler does. const MCAsmInfo *MCAI = TM.getMCAsmInfo(); assert(MCAI && "No MCAsmInfo"); if (!MCAI->useIntegratedAssembler() && + !MCAI->parseInlineAsmUsingAsmParser() && !OutStreamer->isIntegratedAssemblerRequired()) { emitInlineAsmStart(); OutStreamer->emitRawText(Str); @@ -134,10 +94,11 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, } unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode); - DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); + SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager(); + SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); - std::unique_ptr<MCAsmParser> Parser(createMCAsmParser( - DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); // Do not use assembler-level information for parsing inline assembly. OutStreamer->setUseAssemblerInfoForParsing(false); @@ -162,17 +123,14 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. - int Res = Parser->Run(/*NoInitialTextSection*/ true, - /*NoFinalize*/ true); + (void)Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); emitInlineAsmEnd(STI, &TAP->getSTI()); - - if (Res && !DiagInfo->DiagHandler) - report_fatal_error("Error parsing inline asm\n"); } static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, MachineModuleInfo *MMI, AsmPrinter *AP, - unsigned LocCookie, raw_ostream &OS) { + uint64_t LocCookie, raw_ostream &OS) { // Switch to the inline assembly variant. OS << "\t.intel_syntax\n\t"; @@ -313,14 +271,16 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, int AsmPrinterVariant, - AsmPrinter *AP, unsigned LocCookie, + MachineModuleInfo *MMI, const MCAsmInfo *MAI, + AsmPrinter *AP, uint64_t LocCookie, raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); + int AsmPrinterVariant = MAI->getAssemblerDialect(); - OS << '\t'; + if (MAI->getEmitGNUAsmStartIndentationMarker()) + OS << '\t'; while (*LastEmitted) { switch (*LastEmitted) { @@ -523,7 +483,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. - unsigned LocCookie = 0; + uint64_t LocCookie = 0; const MDNode *LocMD = nullptr; for (unsigned i = MI->getNumOperands(); i != 0; --i) { if (MI->getOperand(i-1).isMetadata() && @@ -542,11 +502,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SmallString<256> StringData; raw_svector_ostream OS(StringData); - // The variant of the current asmprinter. - int AsmPrinterVariant = MAI->getAssemblerDialect(); AsmPrinter *AP = const_cast<AsmPrinter*>(this); if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) - EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS); + EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); else EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); @@ -571,23 +529,20 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { } if (!RestrRegs.empty()) { - unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD); - auto &SrcMgr = DiagInfo->SrcMgr; - SMLoc Loc = SMLoc::getFromPointer( - SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); - std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { - if(I != RestrRegs.begin()) - Msg += ", "; - Msg += TRI->getName(*I); + ListSeparator LS; + for (const Register &RR : RestrRegs) { + Msg += LS; + Msg += TRI->getName(RR); } const char *Note = "Reserved registers on the clobber list may not be " "preserved across the asm statement, and clobbering them may " "lead to undefined behaviour."; - SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); - SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); + MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( + LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning)); + MMI->getModule()->getContext().diagnose( + DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note)); } emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, @@ -633,7 +588,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) { assert(MO.isGlobal() && "caller should check MO.isGlobal"); - getSymbol(MO.getGlobal())->print(OS, MAI); + getSymbolPreferLocal(*MO.getGlobal())->print(OS, MAI); printOffset(MO.getOffset(), OS); } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index b15e750aaf85..bbb0504550c3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -273,9 +273,9 @@ static StringRef getPrettyScopeName(const DIScope *Scope) { return "<unnamed-tag>"; case dwarf::DW_TAG_namespace: return "`anonymous namespace'"; + default: + return StringRef(); } - - return StringRef(); } const DISubprogram *CodeViewDebug::collectParentScopeNames( @@ -358,6 +358,25 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { return recordTypeIndexForDINode(Scope, TI); } +static StringRef removeTemplateArgs(StringRef Name) { + // Remove template args from the display name. Assume that the template args + // are the last thing in the name. + if (Name.empty() || Name.back() != '>') + return Name; + + int OpenBrackets = 0; + for (int i = Name.size() - 1; i >= 0; --i) { + if (Name[i] == '>') + ++OpenBrackets; + else if (Name[i] == '<') { + --OpenBrackets; + if (OpenBrackets == 0) + return Name.substr(0, i); + } + } + return Name; +} + TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { assert(SP); @@ -367,8 +386,9 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { return I->second; // The display name includes function template arguments. Drop them to match - // MSVC. - StringRef DisplayName = SP->getName().split('<').first; + // MSVC. We need to have the template arguments in the DISubprogram name + // because they are used in other symbol records, such as S_GPROC32_IDs. + StringRef DisplayName = removeTemplateArgs(SP->getName()); const DIScope *Scope = SP->getScope(); TypeIndex TI; @@ -784,6 +804,9 @@ void CodeViewDebug::emitCompilerInformation() { // The low byte of the flags indicates the source language. Flags = MapDWLangToCVLang(CU->getSourceLanguage()); // TODO: Figure out which other flags need to be set. + if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) { + Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO); + } OS.AddComment("Flags and language"); OS.emitInt32(Flags); @@ -794,8 +817,8 @@ void CodeViewDebug::emitCompilerInformation() { StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); - for (int N = 0; N < 4; ++N) - OS.emitInt16(FrontVer.Part[N]); + for (int N : FrontVer.Part) + OS.emitInt16(N); // Some Microsoft tools, like Binscope, expect a backend version number of at // least 8.something, so we'll coerce the LLVM version into a form that @@ -807,8 +830,8 @@ void CodeViewDebug::emitCompilerInformation() { Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max()); Version BackVer = {{ Major, 0, 0, 0 }}; OS.AddComment("Backend version"); - for (int N = 0; N < 4; ++N) - OS.emitInt16(BackVer.Part[N]); + for (int N : BackVer.Part) + OS.emitInt16(N); OS.AddComment("Null-terminated compiler version string"); emitNullTerminatedSymbolName(OS, CompilerVersion); @@ -1357,7 +1380,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters(); CurFn->FrameSize = MFI.getStackSize(); CurFn->OffsetAdjustment = MFI.getOffsetAdjustment(); - CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF); + CurFn->HasStackRealignment = TRI->hasStackRealignment(*MF); // For this function S_FRAMEPROC record, figure out which codeview register // will be the frame pointer. @@ -1408,6 +1431,10 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.hasOptSize() && !GV.hasOptNone()) FPO |= FrameProcedureOptions::OptimizedForSpeed; + if (GV.hasProfileData()) { + FPO |= FrameProcedureOptions::ValidProfileCounts; + FPO |= FrameProcedureOptions::ProfileGuidedOptimization; + } // FIXME: Set GuardCfg when it is implemented. CurFn->FrameProcOpts = FPO; @@ -1460,6 +1487,9 @@ static bool shouldEmitUdt(const DIType *T) { case dwarf::DW_TAG_class_type: case dwarf::DW_TAG_union_type: return false; + default: + // do nothing. + ; } } } @@ -2005,10 +2035,13 @@ static MethodKind translateMethodKindFlags(const DISubprogram *SP, static TypeRecordKind getRecordKind(const DICompositeType *Ty) { switch (Ty->getTag()) { - case dwarf::DW_TAG_class_type: return TypeRecordKind::Class; - case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct; + case dwarf::DW_TAG_class_type: + return TypeRecordKind::Class; + case dwarf::DW_TAG_structure_type: + return TypeRecordKind::Struct; + default: + llvm_unreachable("unexpected tag"); } - llvm_unreachable("unexpected tag"); } /// Return ClassOptions that should be present on both the forward declaration @@ -2083,6 +2116,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { // We assume that the frontend provides all members in source declaration // order, which is what MSVC does. if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) { + // FIXME: Is it correct to always emit these as unsigned here? EnumeratorRecord ER(MemberAccess::Public, APSInt(Enumerator->getValue(), true), Enumerator->getName()); @@ -3124,6 +3158,27 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { } } +void CodeViewDebug::emitConstantSymbolRecord(const DIType *DTy, APSInt &Value, + const std::string &QualifiedName) { + MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); + OS.AddComment("Type"); + OS.emitInt32(getTypeIndex(DTy).getIndex()); + + OS.AddComment("Value"); + + // Encoded integers shouldn't need more than 10 bytes. + uint8_t Data[10]; + BinaryStreamWriter Writer(Data, llvm::support::endianness::little); + CodeViewRecordIO IO(Writer); + cantFail(IO.mapEncodedInteger(Value)); + StringRef SRef((char *)Data, Writer.getOffset()); + OS.emitBinaryData(SRef); + + OS.AddComment("Name"); + emitNullTerminatedSymbolName(OS, QualifiedName); + endSymbolRecord(SConstantEnd); +} + void CodeViewDebug::emitStaticConstMemberList() { for (const DIDerivedType *DTy : StaticConstMembers) { const DIScope *Scope = DTy->getScope(); @@ -3139,24 +3194,8 @@ void CodeViewDebug::emitStaticConstMemberList() { else llvm_unreachable("cannot emit a constant without a value"); - std::string QualifiedName = getFullyQualifiedName(Scope, DTy->getName()); - - MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); - OS.AddComment("Type"); - OS.emitInt32(getTypeIndex(DTy->getBaseType()).getIndex()); - OS.AddComment("Value"); - - // Encoded integers shouldn't need more than 10 bytes. - uint8_t Data[10]; - BinaryStreamWriter Writer(Data, llvm::support::endianness::little); - CodeViewRecordIO IO(Writer); - cantFail(IO.mapEncodedInteger(Value)); - StringRef SRef((char *)Data, Writer.getOffset()); - OS.emitBinaryData(SRef); - - OS.AddComment("Name"); - emitNullTerminatedSymbolName(OS, QualifiedName); - endSymbolRecord(SConstantEnd); + emitConstantSymbolRecord(DTy->getBaseType(), Value, + getFullyQualifiedName(Scope, DTy->getName())); } } @@ -3220,22 +3259,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { ? true : DebugHandlerBase::isUnsignedDIType(DIGV->getType()); APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned); - - MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); - OS.AddComment("Type"); - OS.emitInt32(getTypeIndex(DIGV->getType()).getIndex()); - OS.AddComment("Value"); - - // Encoded integers shouldn't need more than 10 bytes. - uint8_t data[10]; - BinaryStreamWriter Writer(data, llvm::support::endianness::little); - CodeViewRecordIO IO(Writer); - cantFail(IO.mapEncodedInteger(Value)); - StringRef SRef((char *)data, Writer.getOffset()); - OS.emitBinaryData(SRef); - - OS.AddComment("Name"); - emitNullTerminatedSymbolName(OS, QualifiedName); - endSymbolRecord(SConstantEnd); + emitConstantSymbolRecord(DIGV->getType(), Value, QualifiedName); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 9eee5492bc81..d133474ee5aa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -315,6 +315,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void collectDebugInfoForGlobals(); void emitDebugInfoForGlobals(); void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); + void emitConstantSymbolRecord(const DIType *DTy, APSInt &Value, + const std::string &QualifiedName); void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV); void emitStaticConstMemberList(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 39b0b027c765..2834d9c3ebbf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -785,6 +785,7 @@ void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; + case dwarf::DW_FORM_exprloc: case dwarf::DW_FORM_block: Asm->emitULEB128(Size); break; @@ -803,6 +804,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_exprloc: case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); case dwarf::DW_FORM_data16: return 16; default: llvm_unreachable("Improper form for block"); @@ -853,3 +855,27 @@ void DIELocList::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { LLVM_DUMP_METHOD void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } + +//===----------------------------------------------------------------------===// +// DIEAddrOffset Implementation +//===----------------------------------------------------------------------===// + +unsigned DIEAddrOffset::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + return Addr.SizeOf(AP, dwarf::DW_FORM_addrx) + + Offset.SizeOf(AP, dwarf::DW_FORM_data4); +} + +/// EmitValue - Emit label value. +/// +void DIEAddrOffset::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { + Addr.emitValue(AP, dwarf::DW_FORM_addrx); + Offset.emitValue(AP, dwarf::DW_FORM_data4); +} + +LLVM_DUMP_METHOD +void DIEAddrOffset::print(raw_ostream &O) const { + O << "AddrOffset: "; + Addr.print(O); + O << " + "; + Offset.print(O); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index da9997efc01f..802f0e880514 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -319,6 +319,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { case DIEValue::isLabel: case DIEValue::isBaseTypeRef: case DIEValue::isDelta: + case DIEValue::isAddrOffset: llvm_unreachable("Add support for additional value types."); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 1c9131edab83..bb24f1414ef1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -37,22 +37,6 @@ namespace { using EntryIndex = DbgValueHistoryMap::EntryIndex; } -// If @MI is a DBG_VALUE with debug value described by a -// defined register, returns the number of this register. -// In the other case, returns 0. -static Register isDescribedByReg(const MachineInstr &MI) { - assert(MI.isDebugValue()); - assert(MI.getNumOperands() == 4); - // If the location of variable is an entry value (DW_OP_LLVM_entry_value) - // do not consider it as a register location. - if (MI.getDebugExpression()->isEntryValue()) - return 0; - // If location of variable is described using a register (directly or - // indirectly), this register is always a first operand. - return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg() - : Register(); -} - void InstructionOrdering::initialize(const MachineFunction &MF) { // We give meta instructions the same ordinal as the preceding instruction // because this class is written for the task of comparing positions of @@ -273,6 +257,23 @@ void DbgValueHistoryMap::trimLocationRanges( } } +bool DbgValueHistoryMap::hasNonEmptyLocation(const Entries &Entries) const { + for (const auto &Entry : Entries) { + if (!Entry.isDbgValue()) + continue; + + const MachineInstr *MI = Entry.getInstr(); + assert(MI->isDebugValue()); + // A DBG_VALUE $noreg is an empty variable location + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) + continue; + + return true; + } + + return false; +} + void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) { assert(MI.isDebugLabel() && "not a DBG_LABEL"); LabelInstr[Label] = &MI; @@ -316,24 +317,44 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, } /// Create a clobbering entry and end all open debug value entries -/// for \p Var that are described by \p RegNo using that entry. +/// for \p Var that are described by \p RegNo using that entry. Inserts into \p +/// FellowRegisters the set of Registers that were also used to describe \p Var +/// alongside \p RegNo. static void clobberRegEntries(InlinedEntity Var, unsigned RegNo, const MachineInstr &ClobberingInstr, DbgValueEntriesMap &LiveEntries, - DbgValueHistoryMap &HistMap) { + DbgValueHistoryMap &HistMap, + SmallVectorImpl<Register> &FellowRegisters) { EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr); - // Close all entries whose values are described by the register. SmallVector<EntryIndex, 4> IndicesToErase; + // If a given register appears in a live DBG_VALUE_LIST for Var alongside the + // clobbered register, and never appears in a live DBG_VALUE* for Var without + // the clobbered register, then it is no longer linked to the variable. + SmallSet<Register, 4> MaybeRemovedRegisters; + SmallSet<Register, 4> KeepRegisters; for (auto Index : LiveEntries[Var]) { auto &Entry = HistMap.getEntry(Var, Index); assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries"); - if (isDescribedByReg(*Entry.getInstr()) == RegNo) { + if (Entry.getInstr()->isDebugEntryValue()) + continue; + if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) { IndicesToErase.push_back(Index); Entry.endEntry(ClobberIndex); + for (auto &MO : Entry.getInstr()->debug_operands()) + if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo) + MaybeRemovedRegisters.insert(MO.getReg()); + } else { + for (auto &MO : Entry.getInstr()->debug_operands()) + if (MO.isReg() && MO.getReg()) + KeepRegisters.insert(MO.getReg()); } } + for (Register Reg : MaybeRemovedRegisters) + if (!KeepRegisters.contains(Reg)) + FellowRegisters.push_back(Reg); + // Drop all entries that have ended. for (auto Index : IndicesToErase) LiveEntries[Var].erase(Index); @@ -361,17 +382,24 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, IndicesToErase.push_back(Index); Entry.endEntry(NewIndex); } - if (Register Reg = isDescribedByReg(DV)) - TrackedRegs[Reg] |= !Overlaps; + if (!DV.isDebugEntryValue()) + for (const MachineOperand &Op : DV.debug_operands()) + if (Op.isReg() && Op.getReg()) + TrackedRegs[Op.getReg()] |= !Overlaps; } // If the new debug value is described by a register, add tracking of // that register if it is not already tracked. - if (Register NewReg = isDescribedByReg(DV)) { - if (!TrackedRegs.count(NewReg)) - addRegDescribedVar(RegVars, NewReg, Var); - LiveEntries[Var].insert(NewIndex); - TrackedRegs[NewReg] = true; + if (!DV.isDebugEntryValue()) { + for (const MachineOperand &Op : DV.debug_operands()) { + if (Op.isReg() && Op.getReg()) { + Register NewReg = Op.getReg(); + if (!TrackedRegs.count(NewReg)) + addRegDescribedVar(RegVars, NewReg, Var); + LiveEntries[Var].insert(NewIndex); + TrackedRegs[NewReg] = true; + } + } } // Drop tracking of registers that are no longer used. @@ -394,9 +422,16 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, DbgValueEntriesMap &LiveEntries, const MachineInstr &ClobberingInstr) { // Iterate over all variables described by this register and add this - // instruction to their history, clobbering it. - for (const auto &Var : I->second) - clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap); + // instruction to their history, clobbering it. All registers that also + // describe the clobbered variables (i.e. in variadic debug values) will have + // those Variables removed from their DescribedVars. + for (const auto &Var : I->second) { + SmallVector<Register, 4> FellowRegisters; + clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap, + FellowRegisters); + for (Register RegNo : FellowRegisters) + dropRegDescribedVar(RegVars, RegNo, Var); + } RegVars.erase(I); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 68a4bfba42a7..c81288c0e460 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -35,7 +35,8 @@ Optional<DbgVariableLocation> DbgVariableLocation::extractFromMachineInstruction( const MachineInstr &Instruction) { DbgVariableLocation Location; - if (!Instruction.isDebugValue()) + // Variables calculated from multiple locations can't be represented here. + if (Instruction.getNumDebugOperands() != 1) return None; if (!Instruction.getDebugOperand(0).isReg()) return None; @@ -46,6 +47,15 @@ DbgVariableLocation::extractFromMachineInstruction( int64_t Offset = 0; const DIExpression *DIExpr = Instruction.getDebugExpression(); auto Op = DIExpr->expr_op_begin(); + // We can handle a DBG_VALUE_LIST iff it has exactly one location operand that + // appears exactly once at the start of the expression. + if (Instruction.isDebugValueList()) { + if (Instruction.getNumDebugOperands() == 1 && + Op->getOp() == dwarf::DW_OP_LLVM_arg) + ++Op; + else + return None; + } while (Op != DIExpr->expr_op_end()) { switch (Op->getOp()) { case dwarf::DW_OP_constu: { @@ -164,6 +174,12 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { } bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { + // SROA may generate dbg value intrinsics to assign an unsigned value to a + // Fortran CHARACTER(1) type variables. Make them as unsigned. + if (isa<DIStringType>(Ty)) { + assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!"); + return true; + } if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { // FIXME: Enums without a fixed underlying type have unknown signedness // here, leading to incorrectly emitted constants. @@ -261,7 +277,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { continue; auto IsDescribedByReg = [](const MachineInstr *MI) { - return MI->getDebugOperand(0).isReg() && MI->getDebugOperand(0).getReg(); + return any_of(MI->debug_operands(), + [](auto &MO) { return MO.isReg() && MO.getReg(); }); }; // The first mention of a function argument gets the CurrentFnBegin label, @@ -273,16 +290,10 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // doing that violates the ranges that are calculated in the history map. // However, we currently do not emit debug values for constant arguments // directly at the start of the function, so this code is still useful. - // FIXME: If the first mention of an argument is in a unique section basic - // block, we cannot always assign the CurrentFnBeginLabel as it lies in a - // different section. Temporarily, we disable generating loc list - // information or DW_AT_const_value when the block is in a different - // section. const DILocalVariable *DIVar = Entries.front().getInstr()->getDebugVariable(); if (DIVar->isParameter() && - getDISubprogram(DIVar->getScope())->describes(&MF->getFunction()) && - Entries.front().getInstr()->getParent()->sameSection(&MF->front())) { + getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { if (!IsDescribedByReg(Entries.front().getInstr())) LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { @@ -368,22 +379,25 @@ void DebugHandlerBase::endInstruction() { DenseMap<const MachineInstr *, MCSymbol *>::iterator I = LabelsAfterInsn.find(CurMI); - CurMI = nullptr; - - // No label needed. - if (I == LabelsAfterInsn.end()) - return; - // Label already assigned. - if (I->second) + // No label needed or label already assigned. + if (I == LabelsAfterInsn.end() || I->second) { + CurMI = nullptr; return; + } - // We need a label after this instruction. - if (!PrevLabel) { + // We need a label after this instruction. With basic block sections, just + // use the end symbol of the section if this is the last instruction of the + // section. This reduces the need for an additional label and also helps + // merging ranges. + if (CurMI->getParent()->isEndSection() && CurMI->getNextNode() == nullptr) { + PrevLabel = CurMI->getParent()->getEndSymbol(); + } else if (!PrevLabel) { PrevLabel = MMI->getContext().createTempSymbol(); Asm->OutStreamer->emitLabel(PrevLabel); } I->second = PrevLabel; + CurMI = nullptr; } void DebugHandlerBase::endFunction(const MachineFunction *MF) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 36278f2e9e2d..62ebadaf3cbe 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -34,10 +34,10 @@ struct TargetIndexLocation { } }; -/// A single location or constant. -class DbgValueLoc { - /// Any complex address location expression for this DbgValueLoc. - const DIExpression *Expression; +/// A single location or constant within a variable location description, with +/// either a single entry (with an optional DIExpression) used for a DBG_VALUE, +/// or a list of entries used for a DBG_VALUE_LIST. +class DbgValueLocEntry { /// Type of entry that this represents. enum EntryType { @@ -64,24 +64,16 @@ class DbgValueLoc { }; public: - DbgValueLoc(const DIExpression *Expr, int64_t i) - : Expression(Expr), EntryKind(E_Integer) { - Constant.Int = i; - } - DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP) - : Expression(Expr), EntryKind(E_ConstantFP) { + DbgValueLocEntry(int64_t i) : EntryKind(E_Integer) { Constant.Int = i; } + DbgValueLocEntry(const ConstantFP *CFP) : EntryKind(E_ConstantFP) { Constant.CFP = CFP; } - DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP) - : Expression(Expr), EntryKind(E_ConstantInt) { + DbgValueLocEntry(const ConstantInt *CIP) : EntryKind(E_ConstantInt) { Constant.CIP = CIP; } - DbgValueLoc(const DIExpression *Expr, MachineLocation Loc) - : Expression(Expr), EntryKind(E_Location), Loc(Loc) { - assert(cast<DIExpression>(Expr)->isValid()); - } - DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc) - : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {} + DbgValueLocEntry(MachineLocation Loc) : EntryKind(E_Location), Loc(Loc) {} + DbgValueLocEntry(TargetIndexLocation Loc) + : EntryKind(E_TargetIndexLocation), TIL(Loc) {} bool isLocation() const { return EntryKind == E_Location; } bool isTargetIndexLocation() const { @@ -95,11 +87,7 @@ public: const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } TargetIndexLocation getTargetIndexLocation() const { return TIL; } - bool isFragment() const { return getExpression()->isFragment(); } - bool isEntryVal() const { return getExpression()->isEntryValue(); } - const DIExpression *getExpression() const { return Expression; } - friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); - friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); + friend bool operator==(const DbgValueLocEntry &, const DbgValueLocEntry &); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { if (isLocation()) { @@ -111,6 +99,67 @@ public: Constant.CIP->dump(); else if (isConstantFP()) Constant.CFP->dump(); + } +#endif +}; + +/// The location of a single variable, composed of an expression and 0 or more +/// DbgValueLocEntries. +class DbgValueLoc { + /// Any complex address location expression for this DbgValueLoc. + const DIExpression *Expression; + + SmallVector<DbgValueLocEntry, 2> ValueLocEntries; + + bool IsVariadic; + +public: + DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs) + : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()), + IsVariadic(true) { +#ifndef NDEBUG + // Currently, DBG_VALUE_VAR expressions must use stack_value. + assert(Expr && Expr->isValid() && + is_contained(Locs, dwarf::DW_OP_stack_value)); +#endif + } + + DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs, + bool IsVariadic) + : Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()), + IsVariadic(IsVariadic) { +#ifndef NDEBUG + assert(cast<DIExpression>(Expr)->isValid() || + !any_of(Locs, [](auto LE) { return LE.isLocation(); })); + if (!IsVariadic) { + assert(ValueLocEntries.size() == 1); + } else { + // Currently, DBG_VALUE_VAR expressions must use stack_value. + assert(Expr && Expr->isValid() && + is_contained(Expr->getElements(), dwarf::DW_OP_stack_value)); + } +#endif + } + + DbgValueLoc(const DIExpression *Expr, DbgValueLocEntry Loc) + : Expression(Expr), ValueLocEntries(1, Loc), IsVariadic(false) { + assert(((Expr && Expr->isValid()) || !Loc.isLocation()) && + "DBG_VALUE with a machine location must have a valid expression."); + } + + bool isFragment() const { return getExpression()->isFragment(); } + bool isEntryVal() const { return getExpression()->isEntryValue(); } + bool isVariadic() const { return IsVariadic; } + const DIExpression *getExpression() const { return Expression; } + const ArrayRef<DbgValueLocEntry> getLocEntries() const { + return ValueLocEntries; + } + friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); + friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { + for (DbgValueLocEntry DV : ValueLocEntries) + DV.dump(); if (Expression) Expression->dump(); } @@ -180,30 +229,32 @@ public: DwarfCompileUnit &TheCU); }; -/// Compare two DbgValueLocs for equality. -inline bool operator==(const DbgValueLoc &A, - const DbgValueLoc &B) { +/// Compare two DbgValueLocEntries for equality. +inline bool operator==(const DbgValueLocEntry &A, const DbgValueLocEntry &B) { if (A.EntryKind != B.EntryKind) return false; - if (A.Expression != B.Expression) - return false; - switch (A.EntryKind) { - case DbgValueLoc::E_Location: + case DbgValueLocEntry::E_Location: return A.Loc == B.Loc; - case DbgValueLoc::E_TargetIndexLocation: + case DbgValueLocEntry::E_TargetIndexLocation: return A.TIL == B.TIL; - case DbgValueLoc::E_Integer: + case DbgValueLocEntry::E_Integer: return A.Constant.Int == B.Constant.Int; - case DbgValueLoc::E_ConstantFP: + case DbgValueLocEntry::E_ConstantFP: return A.Constant.CFP == B.Constant.CFP; - case DbgValueLoc::E_ConstantInt: + case DbgValueLocEntry::E_ConstantInt: return A.Constant.CIP == B.Constant.CIP; } llvm_unreachable("unhandled EntryKind"); } +/// Compare two DbgValueLocs for equality. +inline bool operator==(const DbgValueLoc &A, const DbgValueLoc &B) { + return A.ValueLocEntries == B.ValueLocEntries && + A.Expression == B.Expression && A.IsVariadic == B.IsVariadic; +} + /// Compare two fragments based on their offset. inline bool operator<(const DbgValueLoc &A, const DbgValueLoc &B) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index c20ac6040aef..e36b7e2ae885 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -53,8 +53,7 @@ void DwarfCFIExceptionBase::endFragment() { DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), - forceEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false) {} + forceEmitPersonality(false), shouldEmitLSDA(false) {} DwarfCFIException::~DwarfCFIException() {} @@ -87,16 +86,15 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm, } void DwarfCFIException::beginFunction(const MachineFunction *MF) { - shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + shouldEmitPersonality = shouldEmitLSDA = false; const Function &F = MF->getFunction(); // If any landing pads survive, we need an EH table. bool hasLandingPads = !MF->getLandingPads().empty(); // See if we need frame move info. - AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); - - shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; + bool shouldEmitMoves = + Asm->getFunctionCFISectionType(*MF) != AsmPrinter::CFISection::None; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); @@ -122,8 +120,13 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() && - (shouldEmitPersonality || shouldEmitMoves); + const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo(); + if (MAI.getExceptionHandlingType() != ExceptionHandling::None) + shouldEmitCFI = + MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves); + else + shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves; + beginFragment(&*MF->begin(), getExceptionSym); } @@ -133,10 +136,14 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, return; if (!hasEmittedCFISections) { - if (Asm->needsOnlyDebugCFIMoves()) - Asm->OutStreamer->emitCFISections(false, true); - else if (Asm->TM.Options.ForceDwarfFrameSection) - Asm->OutStreamer->emitCFISections(true, true); + AsmPrinter::CFISection CFISecType = Asm->getModuleCFISectionType(); + // If we don't say anything it implies `.cfi_sections .eh_frame`, so we + // chose not to be verbose in that case. And with `ForceDwarfFrameSection`, + // we should always emit .debug_frame. + if (CFISecType == AsmPrinter::CFISection::Debug || + Asm->TM.Options.ForceDwarfFrameSection) + Asm->OutStreamer->emitCFISections( + CFISecType == AsmPrinter::CFISection::EH, true); hasEmittedCFISections = true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index befc4bba19a2..faa14dca1c3f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" @@ -73,11 +74,35 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - unsigned idx = DD->getAddressPool().getIndex(Label); - Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx - : dwarf::DW_FORM_GNU_addr_index, - DIEInteger(idx)); + bool UseAddrOffsetFormOrExpressions = + DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions(); + + const MCSymbol *Base = nullptr; + if (Label->isInSection() && UseAddrOffsetFormOrExpressions) + Base = DD->getSectionLabel(&Label->getSection()); + + if (!Base || Base == Label) { + unsigned idx = DD->getAddressPool().getIndex(Label); + addAttribute(Die, Attribute, + DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx + : dwarf::DW_FORM_GNU_addr_index, + DIEInteger(idx)); + return; + } + + // Could be extended to work with DWARFv4 Split DWARF if that's important for + // someone. In that case DW_FORM_data would be used. + assert(DD->getDwarfVersion() >= 5 && + "Addr+offset expressions are only valuable when using debug_addr (to " + "reduce relocations) available in DWARFv5 or higher"); + if (DD->useAddrOffsetExpressions()) { + auto *Loc = new (DIEValueAllocator) DIEBlock(); + addPoolOpAddress(*Loc, Label); + addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc); + } else + addAttribute(Die, Attribute, dwarf::DW_FORM_LLVM_addrx_offset, + new (DIEValueAllocator) DIEAddrOffset( + DD->getAddressPool().getIndex(Base), Label, Base)); } void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, @@ -87,11 +112,9 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, DD->addArangeLabel(SymbolCU(this, Label)); if (Label) - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, - DIELabel(Label)); + addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label)); else - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, - DIEInteger(0)); + addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0)); } unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { @@ -184,11 +207,16 @@ void DwarfCompileUnit::addLocationAttribute( const DIExpression *Expr = GE.Expr; // For compatibility with DWARF 3 and earlier, - // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes + // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) or + // DW_AT_location(DW_OP_consts, X, DW_OP_stack_value) becomes // DW_AT_const_value(X). if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) { addToAccelTable = true; - addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1)); + addConstantValue( + *VariableDIE, + DIExpression::SignedOrUnsignedConstant::UnsignedConstant == + *Expr->isConstant(), + Expr->getElement(1)); break; } @@ -422,10 +450,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // FIXME: duplicated from Target/WebAssembly/WebAssembly.h // don't want to depend on target specific headers in this code? const unsigned TI_GLOBAL_RELOC = 3; - // FIXME: when writing dwo, we need to avoid relocations. Probably - // the "right" solution is to treat globals the way func and data symbols - // are (with entries in .debug_addr). - if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC && !isDwoUnit()) { + if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) { // These need to be relocatable. assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far. auto SPSym = cast<MCSymbolWasm>( @@ -443,8 +468,16 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location); addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC); - addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); - DD->addArangeLabel(SymbolCU(this, SPSym)); + if (!isDwoUnit()) { + addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); + DD->addArangeLabel(SymbolCU(this, SPSym)); + } else { + // FIXME: when writing dwo, we need to avoid relocations. Probably + // the "right" solution is to treat globals the way func and data + // symbols are (with entries in .debug_addr). + // For now, since we only ever use index 0, this should work as-is. + addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index); + } addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); } else { @@ -698,36 +731,92 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // Check if variable has a single location description. if (auto *DVal = DV.getValueLoc()) { - if (DVal->isLocation()) - addVariableAddress(DV, *VariableDie, DVal->getLoc()); - else if (DVal->isInt()) { - auto *Expr = DV.getSingleExpression(); - if (Expr && Expr->getNumElements()) { + if (!DVal->isVariadic()) { + const DbgValueLocEntry *Entry = DVal->getLocEntries().begin(); + if (Entry->isLocation()) { + addVariableAddress(DV, *VariableDie, Entry->getLoc()); + } else if (Entry->isInt()) { + auto *Expr = DV.getSingleExpression(); + if (Expr && Expr->getNumElements()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + // If there is an expression, emit raw unsigned bytes. + DwarfExpr.addFragmentOffset(Expr); + DwarfExpr.addUnsignedConstant(Entry->getInt()); + DwarfExpr.addExpression(Expr); + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, + dwarf::DW_FORM_data1, *DwarfExpr.TagOffset); + } else + addConstantValue(*VariableDie, Entry->getInt(), DV.getType()); + } else if (Entry->isConstantFP()) { + addConstantFPValue(*VariableDie, Entry->getConstantFP()); + } else if (Entry->isConstantInt()) { + addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType()); + } else if (Entry->isTargetIndexLocation()) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - // If there is an expression, emit raw unsigned bytes. - DwarfExpr.addFragmentOffset(Expr); - DwarfExpr.addUnsignedConstant(DVal->getInt()); - DwarfExpr.addExpression(Expr); + const DIBasicType *BT = dyn_cast<DIBasicType>( + static_cast<const Metadata *>(DV.getVariable()->getType())); + DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr); addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); - if (DwarfExpr.TagOffset) - addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, - dwarf::DW_FORM_data1, *DwarfExpr.TagOffset); - - } else - addConstantValue(*VariableDie, DVal->getInt(), DV.getType()); - } else if (DVal->isConstantFP()) { - addConstantFPValue(*VariableDie, DVal->getConstantFP()); - } else if (DVal->isConstantInt()) { - addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType()); - } else if (DVal->isTargetIndexLocation()) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - const DIBasicType *BT = dyn_cast<DIBasicType>( - static_cast<const Metadata *>(DV.getVariable()->getType())); - DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr); - addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + } + return VariableDie; } + // If any of the location entries are registers with the value 0, then the + // location is undefined. + if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) { + return Entry.isLocation() && !Entry.getLoc().getReg(); + })) + return VariableDie; + const DIExpression *Expr = DV.getSingleExpression(); + assert(Expr && "Variadic Debug Value must have an Expression."); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DwarfExpr.addFragmentOffset(Expr); + DIExpressionCursor Cursor(Expr); + const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); + + auto AddEntry = [&](const DbgValueLocEntry &Entry, + DIExpressionCursor &Cursor) { + if (Entry.isLocation()) { + if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, + Entry.getLoc().getReg())) + return false; + } else if (Entry.isInt()) { + // If there is an expression, emit raw unsigned bytes. + DwarfExpr.addUnsignedConstant(Entry.getInt()); + } else if (Entry.isConstantFP()) { + APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt(); + DwarfExpr.addUnsignedConstant(RawBytes); + } else if (Entry.isConstantInt()) { + APInt RawBytes = Entry.getConstantInt()->getValue(); + DwarfExpr.addUnsignedConstant(RawBytes); + } else if (Entry.isTargetIndexLocation()) { + TargetIndexLocation Loc = Entry.getTargetIndexLocation(); + // TODO TargetIndexLocation is a target-independent. Currently only the + // WebAssembly-specific encoding is supported. + assert(Asm->TM.getTargetTriple().isWasm()); + DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset)); + } else { + llvm_unreachable("Unsupported Entry type."); + } + return true; + }; + + DwarfExpr.addExpression( + std::move(Cursor), + [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool { + return AddEntry(DVal->getLocEntries()[Idx], Cursor); + }); + + // Now attach the location information to the DIE. + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); + return VariableDie; } @@ -1381,7 +1470,7 @@ void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form = (DD->getDwarfVersion() >= 5) ? dwarf::DW_FORM_loclistx : DD->getDwarfSectionOffsetForm(); - Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); + addAttribute(Die, Attribute, Form, DIELocList(Index)); } void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, @@ -1413,7 +1502,7 @@ void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label, /// Add a Dwarf expression attribute data and value. void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { - Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); + addAttribute(Die, (dwarf::Attribute)0, Form, DIEExpr(Expr)); } void DwarfCompileUnit::applySubprogramAttributesToDefinition( @@ -1447,7 +1536,7 @@ void DwarfCompileUnit::addAddrTableBase() { } void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) { - Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata, + addAttribute(Die, (dwarf::Attribute)0, dwarf::DW_FORM_udata, new (DIEValueAllocator) DIEBaseTypeRef(this, Idx)); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 462682743c6a..ee14423ca3d0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -160,6 +160,13 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option( clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges", "Use rnglists for contiguous ranges if that allows " "using a pre-existing base address"), + clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions, + "Expressions", + "Use exprloc addrx+offset expressions for any " + "address with a prior base address"), + clEnumValN(DwarfDebug::MinimizeAddrInV5::Form, "Form", + "Use addrx+offset extension form for any address " + "with a prior base address"), clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled", "Stuff")), cl::init(DwarfDebug::MinimizeAddrInV5::Default)); @@ -228,29 +235,27 @@ const DIType *DbgVariable::getType() const { /// Get .debug_loc entry for the instruction range starting at MI. static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); - assert(MI->getNumOperands() == 4); - if (MI->getDebugOperand(0).isReg()) { - const auto &RegOp = MI->getDebugOperand(0); - const auto &Op1 = MI->getDebugOffset(); - // If the second operand is an immediate, this is a - // register-indirect address. - assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); - MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); - return DbgValueLoc(Expr, MLoc); - } - if (MI->getDebugOperand(0).isTargetIndex()) { - const auto &Op = MI->getDebugOperand(0); - return DbgValueLoc(Expr, - TargetIndexLocation(Op.getIndex(), Op.getOffset())); - } - if (MI->getDebugOperand(0).isImm()) - return DbgValueLoc(Expr, MI->getDebugOperand(0).getImm()); - if (MI->getDebugOperand(0).isFPImm()) - return DbgValueLoc(Expr, MI->getDebugOperand(0).getFPImm()); - if (MI->getDebugOperand(0).isCImm()) - return DbgValueLoc(Expr, MI->getDebugOperand(0).getCImm()); - - llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); + const bool IsVariadic = MI->isDebugValueList(); + assert(MI->getNumOperands() >= 3); + SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries; + for (const MachineOperand &Op : MI->debug_operands()) { + if (Op.isReg()) { + MachineLocation MLoc(Op.getReg(), + MI->isNonListDebugValue() && MI->isDebugOffsetImm()); + DbgValueLocEntries.push_back(DbgValueLocEntry(MLoc)); + } else if (Op.isTargetIndex()) { + DbgValueLocEntries.push_back( + DbgValueLocEntry(TargetIndexLocation(Op.getIndex(), Op.getOffset()))); + } else if (Op.isImm()) + DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getImm())); + else if (Op.isFPImm()) + DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getFPImm())); + else if (Op.isCImm()) + DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getCImm())); + else + llvm_unreachable("Unexpected debug operand in DBG_VALUE* instruction!"); + } + return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic); } void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { @@ -357,11 +362,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) DebuggerTuning = DebuggerKind::LLDB; else if (TT.isPS4CPU()) DebuggerTuning = DebuggerKind::SCE; + else if (TT.isOSAIX()) + DebuggerTuning = DebuggerKind::DBX; else DebuggerTuning = DebuggerKind::GDB; if (DwarfInlinedStrings == Default) - UseInlineStrings = TT.isNVPTX(); + UseInlineStrings = TT.isNVPTX() || tuneForDBX(); else UseInlineStrings = DwarfInlinedStrings == Enable; @@ -385,10 +392,21 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) DwarfVersion = TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); - bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 && - DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3. - TT.isArch64Bit() && // DWARF64 requires 64-bit relocations. - TT.isOSBinFormatELF(); // Support only ELF for now. + bool Dwarf64 = DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3. + TT.isArch64Bit(); // DWARF64 requires 64-bit relocations. + + // Support DWARF64 + // 1: For ELF when requested. + // 2: For XCOFF64: the AIX assembler will fill in debug section lengths + // according to the DWARF64 format for 64-bit assembly, so we must use + // DWARF64 in the compiler too for 64-bit mode. + Dwarf64 &= + ((Asm->TM.Options.MCOptions.Dwarf64 || MMI->getModule()->isDwarf64()) && + TT.isOSBinFormatELF()) || + TT.isOSBinFormatXCOFF(); + + if (!Dwarf64 && TT.isArch64Bit() && TT.isOSBinFormatXCOFF()) + report_fatal_error("XCOFF requires DWARF64 for 64-bit mode!"); UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); @@ -627,7 +645,7 @@ static void finishCallSiteParams(ValT Val, const DIExpression *Expr, assert((!CombinedExpr || CombinedExpr->isValid()) && "Combined debug expression is invalid"); - DbgValueLoc DbgLocVal(CombinedExpr, Val); + DbgValueLoc DbgLocVal(CombinedExpr, DbgValueLocEntry(Val)); DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal); Params.push_back(CSParm); ++NumCSParams; @@ -701,7 +719,7 @@ static void interpretValues(const MachineInstr *CurMI, for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && Register::isPhysicalRegister(MO.getReg())) { - for (auto FwdReg : ForwardedRegWorklist) + for (auto &FwdReg : ForwardedRegWorklist) if (TRI.regsOverlap(FwdReg.first, MO.getReg())) Defs.insert(FwdReg.first); } @@ -750,7 +768,7 @@ static void interpretValues(const MachineInstr *CurMI, // Now that we are done handling this instruction, add items from the // temporary worklist to the real one. - for (auto New : TmpWorklistItems) + for (auto &New : TmpWorklistItems) addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second); TmpWorklistItems.clear(); } @@ -785,7 +803,7 @@ static bool interpretNextInstr(const MachineInstr *CurMI, static void collectCallSiteParameters(const MachineInstr *CallMI, ParamSet &Params) { const MachineFunction *MF = CallMI->getMF(); - auto CalleesMap = MF->getCallSitesInfo(); + const auto &CalleesMap = MF->getCallSitesInfo(); auto CallFwdRegsInfo = CalleesMap.find(CallMI); // There is no information for the call instruction. @@ -803,7 +821,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, DIExpression::get(MF->getFunction().getContext(), {}); // Add all the forwarding registers into the ForwardedRegWorklist. - for (auto ArgReg : CallFwdRegsInfo->second) { + for (const auto &ArgReg : CallFwdRegsInfo->second) { bool InsertedReg = ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}}) .second; @@ -851,7 +869,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, // Create an expression where the register's entry value is used. DIExpression *EntryExpr = DIExpression::get( MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); - for (auto RegEntry : ForwardedRegWorklist) { + for (auto &RegEntry : ForwardedRegWorklist) { MachineLocation MLoc(RegEntry.first); finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params); } @@ -920,8 +938,10 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // If this is a direct call, find the callee's subprogram. // In the case of an indirect call find the register that holds // the callee. - const MachineOperand &CalleeOp = MI.getOperand(0); - if (!CalleeOp.isGlobal() && !CalleeOp.isReg()) + const MachineOperand &CalleeOp = TII->getCalleeOperand(MI); + if (!CalleeOp.isGlobal() && + (!CalleeOp.isReg() || + !Register::isPhysicalRegister(CalleeOp.getReg()))) continue; unsigned CallReg = 0; @@ -1216,6 +1236,7 @@ void DwarfDebug::beginModule(Module *M) { if (!GVMapEntry.size() || (Expr && Expr->isConstant())) GVMapEntry.push_back({nullptr, Expr}); } + DenseSet<DIGlobalVariable *> Processed; for (auto *GVE : CUNode->getGlobalVariables()) { DIGlobalVariable *GV = GVE->getVariable(); @@ -1533,6 +1554,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( RegVar->initializeMMI(VI.Expr, VI.Slot); LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName() << "\n"); + if (DbgVariable *DbgVar = MFVars.lookup(Var)) DbgVar->addMMIEntry(*RegVar); else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) { @@ -1595,7 +1617,9 @@ static bool validThroughout(LexicalScopes &LScopes, // throughout the function. This is a hack, presumably for DWARF v2 and not // necessarily correct. It would be much better to use a dbg.declare instead // if we know the constant is live throughout the scope. - if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty()) + if (MBB->pred_empty() && + all_of(DbgValue->debug_operands(), + [](const MachineOperand &Op) { return Op.isImm(); })) return true; // Test if the location terminates before the end of the scope. @@ -1719,7 +1743,30 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, SmallVector<DbgValueLoc, 4> Values; for (auto &R : OpenRanges) Values.push_back(R.second); - DebugLoc.emplace_back(StartLabel, EndLabel, Values); + + // With Basic block sections, it is posssible that the StartLabel and the + // Instr are not in the same section. This happens when the StartLabel is + // the function begin label and the dbg value appears in a basic block + // that is not the entry. In this case, the range needs to be split to + // span each individual section in the range from StartLabel to EndLabel. + if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() && + !Instr->getParent()->sameSection(&Asm->MF->front())) { + const MCSymbol *BeginSectionLabel = StartLabel; + + for (const MachineBasicBlock &MBB : *Asm->MF) { + if (MBB.isBeginSection() && &MBB != &Asm->MF->front()) + BeginSectionLabel = MBB.getSymbol(); + + if (MBB.sameSection(Instr->getParent())) { + DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values); + break; + } + if (MBB.isEndSection()) + DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values); + } + } else { + DebugLoc.emplace_back(StartLabel, EndLabel, Values); + } // Attempt to coalesce the ranges of two otherwise identical // DebugLocEntries. @@ -1736,8 +1783,46 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, DebugLoc.pop_back(); } - return DebugLoc.size() == 1 && isSafeForSingleLocation && - validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()); + if (!isSafeForSingleLocation || + !validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering())) + return false; + + if (DebugLoc.size() == 1) + return true; + + if (!Asm->MF->hasBBSections()) + return false; + + // Check here to see if loclist can be merged into a single range. If not, + // we must keep the split loclists per section. This does exactly what + // MergeRanges does without sections. We don't actually merge the ranges + // as the split ranges must be kept intact if this cannot be collapsed + // into a single range. + const MachineBasicBlock *RangeMBB = nullptr; + if (DebugLoc[0].getBeginSym() == Asm->getFunctionBegin()) + RangeMBB = &Asm->MF->front(); + else + RangeMBB = Entries.begin()->getInstr()->getParent(); + auto *CurEntry = DebugLoc.begin(); + auto *NextEntry = std::next(CurEntry); + while (NextEntry != DebugLoc.end()) { + // Get the last machine basic block of this section. + while (!RangeMBB->isEndSection()) + RangeMBB = RangeMBB->getNextNode(); + if (!RangeMBB->getNextNode()) + return false; + // CurEntry should end the current section and NextEntry should start + // the next section and the Values must match for these two ranges to be + // merged. + if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() || + NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() || + CurEntry->getValues() != NextEntry->getValues()) + return false; + RangeMBB = RangeMBB->getNextNode(); + CurEntry = NextEntry; + NextEntry = std::next(CurEntry); + } + return true; } DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, @@ -1776,7 +1861,10 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, // Instruction ranges, specifying where IV is accessible. const auto &HistoryMapEntries = I.second; - if (HistoryMapEntries.empty()) + + // Try to find any non-empty variable location. Do not create a concrete + // entity if there are no locations. + if (!DbgValues.hasNonEmptyLocation(HistoryMapEntries)) continue; LexicalScope *Scope = nullptr; @@ -2363,12 +2451,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, TheU = Skeleton; // Emit the header. - MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); - MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); - Asm->emitDwarfUnitLength(EndLabel, BeginLabel, - "Length of Public " + Name + " Info"); - - Asm->OutStreamer->emitLabel(BeginLabel); + MCSymbol *EndLabel = Asm->emitDwarfUnitLength( + "pub" + Name, "Length of Public " + Name + " Info"); Asm->OutStreamer->AddComment("DWARF Version"); Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION); @@ -2469,51 +2553,93 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, auto *DIExpr = Value.getExpression(); DIExpressionCursor ExprCursor(DIExpr); DwarfExpr.addFragmentOffset(DIExpr); - // Regular entry. - if (Value.isInt()) { - if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || - BT->getEncoding() == dwarf::DW_ATE_signed_char)) - DwarfExpr.addSignedConstant(Value.getInt()); - else - DwarfExpr.addUnsignedConstant(Value.getInt()); - } else if (Value.isLocation()) { - MachineLocation Location = Value.getLoc(); + + // If the DIExpr is is an Entry Value, we want to follow the same code path + // regardless of whether the DBG_VALUE is variadic or not. + if (DIExpr && DIExpr->isEntryValue()) { + // Entry values can only be a single register with no additional DIExpr, + // so just add it directly. + assert(Value.getLocEntries().size() == 1); + assert(Value.getLocEntries()[0].isLocation()); + MachineLocation Location = Value.getLocEntries()[0].getLoc(); DwarfExpr.setLocation(Location, DIExpr); - DIExpressionCursor Cursor(DIExpr); - if (DIExpr->isEntryValue()) - DwarfExpr.beginEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(ExprCursor); const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); - if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) - return; - return DwarfExpr.addExpression(std::move(Cursor)); - } else if (Value.isTargetIndexLocation()) { - TargetIndexLocation Loc = Value.getTargetIndexLocation(); - // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific - // encoding is supported. - assert(AP.TM.getTargetTriple().isWasm()); - DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset)); - DwarfExpr.addExpression(std::move(ExprCursor)); - return; - } else if (Value.isConstantFP()) { - if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() && - !ExprCursor) { - DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP); + if (!DwarfExpr.addMachineRegExpression(TRI, ExprCursor, Location.getReg())) return; + return DwarfExpr.addExpression(std::move(ExprCursor)); + } + + // Regular entry. + auto EmitValueLocEntry = [&DwarfExpr, &BT, + &AP](const DbgValueLocEntry &Entry, + DIExpressionCursor &Cursor) -> bool { + if (Entry.isInt()) { + if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || + BT->getEncoding() == dwarf::DW_ATE_signed_char)) + DwarfExpr.addSignedConstant(Entry.getInt()); + else + DwarfExpr.addUnsignedConstant(Entry.getInt()); + } else if (Entry.isLocation()) { + MachineLocation Location = Entry.getLoc(); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); + + const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); + if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) + return false; + } else if (Entry.isTargetIndexLocation()) { + TargetIndexLocation Loc = Entry.getTargetIndexLocation(); + // TODO TargetIndexLocation is a target-independent. Currently only the + // WebAssembly-specific encoding is supported. + assert(AP.TM.getTargetTriple().isWasm()); + DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset)); + } else if (Entry.isConstantFP()) { + if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() && + !Cursor) { + DwarfExpr.addConstantFP(Entry.getConstantFP()->getValueAPF(), AP); + } else if (Entry.getConstantFP() + ->getValueAPF() + .bitcastToAPInt() + .getBitWidth() <= 64 /*bits*/) { + DwarfExpr.addUnsignedConstant( + Entry.getConstantFP()->getValueAPF().bitcastToAPInt()); + } else { + LLVM_DEBUG( + dbgs() << "Skipped DwarfExpression creation for ConstantFP of size" + << Entry.getConstantFP() + ->getValueAPF() + .bitcastToAPInt() + .getBitWidth() + << " bits\n"); + return false; + } } - if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <= - 64 /*bits*/) - DwarfExpr.addUnsignedConstant( - Value.getConstantFP()->getValueAPF().bitcastToAPInt()); - else - LLVM_DEBUG( - dbgs() - << "Skipped DwarfExpression creation for ConstantFP of size" - << Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() - << " bits\n"); + return true; + }; + + if (!Value.isVariadic()) { + if (!EmitValueLocEntry(Value.getLocEntries()[0], ExprCursor)) + return; + DwarfExpr.addExpression(std::move(ExprCursor)); + return; } - DwarfExpr.addExpression(std::move(ExprCursor)); + + // If any of the location entries are registers with the value 0, then the + // location is undefined. + if (any_of(Value.getLocEntries(), [](const DbgValueLocEntry &Entry) { + return Entry.isLocation() && !Entry.getLoc().getReg(); + })) + return; + + DwarfExpr.addExpression( + std::move(ExprCursor), + [EmitValueLocEntry, &Value](unsigned Idx, + DIExpressionCursor &Cursor) -> bool { + return EmitValueLocEntry(Value.getLocEntries()[Idx], Cursor); + }); } void DebugLocEntry::finalize(const AsmPrinter &AP, @@ -3397,7 +3523,10 @@ dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const { } const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { - return SectionLabels.find(S)->second; + auto I = SectionLabels.find(S); + if (I == SectionLabels.end()) + return nullptr; + return I->second; } void DwarfDebug::insertSectionLabel(const MCSymbol *S) { if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index df19ef458888..6356a65b50d3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -383,6 +383,8 @@ public: Default, Disabled, Ranges, + Expressions, + Form, }; private: @@ -438,7 +440,11 @@ private: AccelTable<AppleAccelTableOffsetData> AccelNamespace; AccelTable<AppleAccelTableTypeData> AccelTypes; - // Identify a debugger for "tuning" the debug info. + /// Identify a debugger for "tuning" the debug info. + /// + /// The "tuning" should be used to set defaults for individual feature flags + /// in DwarfDebug; if a given feature has a more specific command-line option, + /// that option should take precedence over the tuning. DebuggerKind DebuggerTuning = DebuggerKind::Default; MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); @@ -706,6 +712,18 @@ public: return MinimizeAddr == MinimizeAddrInV5::Ranges; } + // Returns whether novel exprloc addrx+offset encodings should be used to + // reduce debug_addr size. + bool useAddrOffsetExpressions() const { + return MinimizeAddr == MinimizeAddrInV5::Expressions; + } + + // Returns whether addrx+offset LLVM extension form should be used to reduce + // debug_addr size. + bool useAddrOffsetForm() const { + return MinimizeAddr == MinimizeAddrInV5::Form; + } + /// Returns whether to use sections as labels rather than temp symbols. bool useSectionsAsReferences() const { return UseSectionsAsReferences; @@ -820,6 +838,7 @@ public: bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + bool tuneForDBX() const { return DebuggerTuning == DebuggerKind::DBX; } /// @} const MCSymbol *getSectionLabel(const MCSection *S); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index b19b4365383f..40898c9fc855 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -44,9 +44,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA; - /// Per-function flag to indicate if frame moves info should be emitted. - bool shouldEmitMoves; - public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 59ad7646ce1c..6409c39e7849 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -285,22 +285,29 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // a call site parameter expression and if that expression is just a register // location, emit it with addBReg and offset 0, because we should emit a DWARF // expression representing a value, rather than a location. - if (!isMemoryLocation() && !HasComplexExpression && - (!isParameterValue() || isEntryValue())) { + if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) || + isEntryValue()) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); addOpPiece(Reg.SubRegSize); } - if (isEntryValue()) + if (isEntryValue()) { finalizeEntryValue(); - if (isEntryValue() && !isIndirect() && !isParameterValue() && - DwarfVersion >= 4) - emitOp(dwarf::DW_OP_stack_value); + if (!isIndirect() && !isParameterValue() && !HasComplexExpression && + DwarfVersion >= 4) + emitOp(dwarf::DW_OP_stack_value); + } DwarfRegs.clear(); + // If we need to mask out a subregister, do it now, unless the next + // operation would emit an OpPiece anyway. + auto NextOp = ExprCursor.peek(); + if (SubRegisterSizeInBits && NextOp && + (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment)) + maskSubRegister(); return true; } @@ -353,6 +360,14 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, else addBReg(Reg.DwarfRegNo, SignedOffset); DwarfRegs.clear(); + + // If we need to mask out a subregister, do it now, unless the next + // operation would emit an OpPiece anyway. + auto NextOp = ExprCursor.peek(); + if (SubRegisterSizeInBits && NextOp && + (NextOp->getOp() != dwarf::DW_OP_LLVM_fragment)) + maskSubRegister(); + return true; } @@ -365,11 +380,7 @@ void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) { void DwarfExpression::setLocation(const MachineLocation &Loc, const DIExpression *DIExpr) { if (Loc.isIndirect()) - // Do not treat entry value descriptions of indirect parameters as memory - // locations. This allows DwarfExpression::addReg() to add DW_OP_regN to an - // entry value description. - if (!DIExpr->isEntryValue()) - setMemoryLocationKind(); + setMemoryLocationKind(); if (DIExpr->isEntryValue()) setEntryValueFlags(Loc); @@ -380,12 +391,12 @@ void DwarfExpression::beginEntryValueExpression( auto Op = ExprCursor.take(); (void)Op; assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value); - assert(!isMemoryLocation() && - "We don't support entry values of memory locations yet"); assert(!IsEmittingEntryValue && "Already emitting entry value?"); assert(Op->getArg(0) == 1 && "Can currently only emit entry values covering a single operation"); + SavedLocationKind = LocationKind; + LocationKind = Register; IsEmittingEntryValue = true; enableTemporaryBuffer(); } @@ -403,6 +414,8 @@ void DwarfExpression::finalizeEntryValue() { // Emit the entry value's DWARF block operand. commitTemporaryBuffer(); + LocationFlags &= ~EntryValue; + LocationKind = SavedLocationKind; IsEmittingEntryValue = false; } @@ -415,6 +428,7 @@ void DwarfExpression::cancelEntryValue() { assert(getTemporaryBufferSize() == 0 && "Began emitting entry value block before cancelling entry value"); + LocationKind = SavedLocationKind; IsEmittingEntryValue = false; } @@ -451,16 +465,19 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) { void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, unsigned FragmentOffsetInBits) { + addExpression(std::move(ExprCursor), + [](unsigned Idx, DIExpressionCursor &Cursor) -> bool { + llvm_unreachable("unhandled opcode found in expression"); + }); +} + +void DwarfExpression::addExpression( + DIExpressionCursor &&ExprCursor, + llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg) { // Entry values can currently only cover the initial register location, // and not any other parts of the following DWARF expression. assert(!IsEmittingEntryValue && "Can't emit entry value around expression"); - // If we need to mask out a subregister, do it now, unless the next - // operation would emit an OpPiece anyway. - auto N = ExprCursor.peek(); - if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment)) - maskSubRegister(); - Optional<DIExpression::ExprOperand> PrevConvertOp = None; while (ExprCursor) { @@ -476,6 +493,12 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, } switch (OpNum) { + case dwarf::DW_OP_LLVM_arg: + if (!InsertArg(Op->getArg(0), ExprCursor)) { + LocationKind = Unknown; + return; + } + break; case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); unsigned FragmentOffset = Op->getArg(0); @@ -664,9 +687,14 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) { } void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) { - assert(LocationKind == Implicit || LocationKind == Unknown); - LocationKind = Implicit; emitOp(dwarf::DW_OP_WASM_location); - emitUnsigned(Index); + emitUnsigned(Index == 4/*TI_LOCAL_INDIRECT*/ ? 0/*TI_LOCAL*/ : Index); emitUnsigned(Offset); + if (Index == 4 /*TI_LOCAL_INDIRECT*/) { + assert(LocationKind == Unknown); + LocationKind = Memory; + } else { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; + } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 8fca9f5a630b..513e9072309e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -148,6 +148,7 @@ protected: enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 }; unsigned LocationKind : 3; + unsigned SavedLocationKind : 3; unsigned LocationFlags : 3; unsigned DwarfVersion : 4; @@ -284,8 +285,8 @@ protected: public: DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU) : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0), - LocationKind(Unknown), LocationFlags(Unknown), - DwarfVersion(DwarfVersion) {} + LocationKind(Unknown), SavedLocationKind(Unknown), + LocationFlags(Unknown), DwarfVersion(DwarfVersion) {} /// This needs to be called last to commit any pending changes. void finalize(); @@ -346,6 +347,9 @@ public: /// fragment inside the entire variable. void addExpression(DIExpressionCursor &&Expr, unsigned FragmentOffsetInBits = 0); + void + addExpression(DIExpressionCursor &&Expr, + llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg); /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to /// the fragment described by \c Expr. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 118b5fcc3bf6..344d30fad347 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -100,10 +100,10 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, } DwarfUnit::~DwarfUnit() { - for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) - DIEBlocks[j]->~DIEBlock(); - for (unsigned j = 0, M = DIELocs.size(); j < M; ++j) - DIELocs[j]->~DIELoc(); + for (DIEBlock *B : DIEBlocks) + B->~DIEBlock(); + for (DIELoc *L : DIELocs) + L->~DIELoc(); } int64_t DwarfUnit::getDefaultLowerBound() const { @@ -219,11 +219,9 @@ void DwarfUnit::insertDIE(DIE *D) { void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, - DIEInteger(1)); + addAttribute(Die, Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1)); else - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag, - DIEInteger(1)); + addAttribute(Die, Attribute, dwarf::DW_FORM_flag, DIEInteger(1)); } void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, @@ -232,7 +230,7 @@ void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, Form = DIEInteger::BestForm(false, Integer); assert(Form != dwarf::DW_FORM_implicit_const && "DW_FORM_implicit_const is used only for signed integers"); - Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); + addAttribute(Die, Attribute, *Form, DIEInteger(Integer)); } void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, @@ -244,7 +242,7 @@ void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); - Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); + addAttribute(Die, Attribute, *Form, DIEInteger(Integer)); } void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, @@ -258,7 +256,7 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, return; if (DD->useInlineStrings()) { - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string, + addAttribute(Die, Attribute, dwarf::DW_FORM_string, new (DIEValueAllocator) DIEInlineString(String, DIEValueAllocator)); return; @@ -282,15 +280,12 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, else if (Index > 0xff) IxForm = dwarf::DW_FORM_strx2; } - Die.addValue(DIEValueAllocator, Attribute, IxForm, - DIEString(StringPoolEntry)); + addAttribute(Die, Attribute, IxForm, DIEString(StringPoolEntry)); } -DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, - dwarf::Attribute Attribute, - dwarf::Form Form, - const MCSymbol *Label) { - return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label)); +void DwarfUnit::addLabel(DIEValueList &Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Label) { + addAttribute(Die, Attribute, Form, DIELabel(Label)); } void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { @@ -315,17 +310,39 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { Asm->OutContext.getDwarfVersion(), File->getSource()); } -void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { +void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) { + bool UseAddrOffsetFormOrExpressions = + DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions(); + + const MCSymbol *Base = nullptr; + if (Label->isInSection() && UseAddrOffsetFormOrExpressions) + Base = DD->getSectionLabel(&Label->getSection()); + + uint32_t Index = DD->getAddressPool().getIndex(Base ? Base : Label); + if (DD->getDwarfVersion() >= 5) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx); - addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym)); + addUInt(Die, dwarf::DW_FORM_addrx, Index); + } else { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index); + } + + if (Base && Base != Label) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u); + addLabelDelta(Die, (dwarf::Attribute)0, Label, Base); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + } +} + +void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { + if (DD->getDwarfVersion() >= 5) { + addPoolOpAddress(Die, Sym); return; } if (DD->useSplitDwarf()) { - addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - addUInt(Die, dwarf::DW_FORM_GNU_addr_index, - DD->getAddressPool().getIndex(Sym)); + addPoolOpAddress(Die, Sym); return; } @@ -333,9 +350,9 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { addLabel(Die, dwarf::DW_FORM_addr, Sym); } -void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { - Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4, + addAttribute(Die, Attribute, dwarf::DW_FORM_data4, new (DIEValueAllocator) DIEDelta(Hi, Lo)); } @@ -350,8 +367,8 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) { // and think this is a full definition. addFlag(Die, dwarf::DW_AT_declaration); - Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature, - dwarf::DW_FORM_ref_sig8, DIEInteger(Signature)); + addAttribute(Die, dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, + DIEInteger(Signature)); } void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, @@ -363,13 +380,13 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, CU = getUnitDie().getUnit(); if (!EntryCU) EntryCU = getUnitDie().getUnit(); - Die.addValue(DIEValueAllocator, Attribute, + addAttribute(Die, Attribute, EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, Entry); } -DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { - DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag)); +DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) { + DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, Tag)); if (N) insertDIE(N, &Die); return Die; @@ -378,15 +395,19 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { Loc->ComputeSize(Asm); DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. - Die.addValue(DIEValueAllocator, Attribute, - Loc->BestForm(DD->getDwarfVersion()), Loc); + addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); } -void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block); + addAttribute(Die, Attribute, Form, Block); +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, + DIEBlock *Block) { + addBlock(Die, Attribute, Block->BestForm(), Block); } void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { @@ -918,14 +939,17 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add the type's non-standard calling convention. - uint8_t CC = 0; - if (CTy->isTypePassByValue()) - CC = dwarf::DW_CC_pass_by_value; - else if (CTy->isTypePassByReference()) - CC = dwarf::DW_CC_pass_by_reference; - if (CC) - addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, - CC); + // DW_CC_pass_by_value/DW_CC_pass_by_reference are introduced in DWARF 5. + if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 5) { + uint8_t CC = 0; + if (CTy->isTypePassByValue()) + CC = dwarf::DW_CC_pass_by_value; + else if (CTy->isTypePassByReference()) + CC = dwarf::DW_CC_pass_by_reference; + if (CC) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CC); + } break; } default: @@ -1106,32 +1130,34 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { } bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, - DIE &SPDie) { + DIE &SPDie, bool Minimal) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (auto *SPDecl = SP->getDeclaration()) { - DITypeRefArray DeclArgs, DefinitionArgs; - DeclArgs = SPDecl->getType()->getTypeArray(); - DefinitionArgs = SP->getType()->getTypeArray(); - - if (DeclArgs.size() && DefinitionArgs.size()) - if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0]) - addType(SPDie, DefinitionArgs[0]); - - DeclDie = getDIE(SPDecl); - assert(DeclDie && "This DIE should've already been constructed when the " - "definition DIE was created in " - "getOrCreateSubprogramDIE"); - // Look at the Decl's linkage name only if we emitted it. - if (DD->useAllLinkageNames()) - DeclLinkageName = SPDecl->getLinkageName(); - unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); - unsigned DefID = getOrCreateSourceID(SP->getFile()); - if (DeclID != DefID) - addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); - - if (SP->getLine() != SPDecl->getLine()) - addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + if (!Minimal) { + DITypeRefArray DeclArgs, DefinitionArgs; + DeclArgs = SPDecl->getType()->getTypeArray(); + DefinitionArgs = SP->getType()->getTypeArray(); + + if (DeclArgs.size() && DefinitionArgs.size()) + if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0]) + addType(SPDie, DefinitionArgs[0]); + + DeclDie = getDIE(SPDecl); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); + // Look at the Decl's linkage name only if we emitted it. + if (DD->useAllLinkageNames()) + DeclLinkageName = SPDecl->getLinkageName(); + unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); + unsigned DefID = getOrCreateSourceID(SP->getFile()); + if (DeclID != DefID) + addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + + if (SP->getLine() != SPDecl->getLine()) + addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + } } // Add function template parameters. @@ -1163,7 +1189,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool SkipSPSourceLocation = SkipSPAttributes && !CUNode->getDebugInfoForProfiling(); if (!SkipSPSourceLocation) - if (applySubprogramDefinitionAttributes(SP, SPDie)) + if (applySubprogramDefinitionAttributes(SP, SPDie, SkipSPAttributes)) return; // Constructors and operators for anonymous aggregates do not have names. @@ -1286,9 +1312,6 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, // Count == -1 then the array is unbounded and we do not emit // DW_AT_lower_bound and DW_AT_count attributes. int64_t DefaultLowerBound = getDefaultLowerBound(); - int64_t Count = -1; - if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) - Count = CI->getSExtValue(); auto AddBoundTypeEntry = [&](dwarf::Attribute Attr, DISubrange::BoundType Bound) -> void { @@ -1302,19 +1325,18 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DwarfExpr.addExpression(BE); addBlock(DW_Subrange, Attr, DwarfExpr.finalize()); } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) { - if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || - BI->getSExtValue() != DefaultLowerBound) + if (Attr == dwarf::DW_AT_count) { + if (BI->getSExtValue() != -1) + addUInt(DW_Subrange, Attr, None, BI->getSExtValue()); + } else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || + BI->getSExtValue() != DefaultLowerBound) addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue()); } }; AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound()); - if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { - if (auto *CountVarDIE = getDIE(CV)) - addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); - } else if (Count != -1) - addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); + AddBoundTypeEntry(dwarf::DW_AT_count, SR->getCount()); AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound()); @@ -1336,7 +1358,9 @@ void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer, if (auto *VarDIE = getDIE(BV)) addDIEEntry(DwGenericSubrange, Attr, *VarDIE); } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) { - if (BE->isSignedConstant()) { + if (BE->isConstant() && + DIExpression::SignedOrUnsignedConstant::SignedConstant == + *BE->isConstant()) { if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound) addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata, @@ -1462,9 +1486,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // Add subranges to array type. DINodeArray Elements = CTy->getElements(); - for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + for (DINode *E : Elements) { // FIXME: Should this really be such a loose cast? - if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) { + if (auto *Element = dyn_cast_or_null<DINode>(E)) { if (Element->getTag() == dwarf::DW_TAG_subrange_type) constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy); else if (Element->getTag() == dwarf::DW_TAG_generic_subrange) @@ -1490,8 +1514,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { DINodeArray Elements = CTy->getElements(); // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]); + for (const DINode *E : Elements) { + auto *Enum = dyn_cast_or_null<DIEnumerator>(E); if (Enum) { DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); StringRef Name = Enum->getName(); @@ -1504,10 +1528,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } void DwarfUnit::constructContainingTypeDIEs() { - for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); - CI != CE; ++CI) { - DIE &SPDie = *CI->first; - const DINode *D = CI->second; + for (auto &P : ContainingTypeMap) { + DIE &SPDie = *P.first; + const DINode *D = P.second; if (!D) continue; DIE *NDie = getDIE(D); @@ -1595,9 +1618,18 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); - } else if (!IsBitfield || DD->useDWARF2Bitfields()) - addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, - OffsetInBytes); + } else if (!IsBitfield || DD->useDWARF2Bitfields()) { + // In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are + // interpreted as location-list pointers. Interpreting constants as + // pointers is not expected, so we use DW_FORM_udata to encode the + // constants here. + if (DD->getDwarfVersion() == 3) + addUInt(MemberDie, dwarf::DW_AT_data_member_location, + dwarf::DW_FORM_udata, OffsetInBytes); + else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } } if (DT->isProtected()) @@ -1617,8 +1649,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { // Objective-C properties. if (DINode *PNode = DT->getObjCProperty()) if (DIE *PDie = getDIE(PNode)) - MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property, - dwarf::DW_FORM_ref4, DIEEntry(*PDie)); + addAttribute(MemberDie, dwarf::DW_AT_APPLE_property, + dwarf::DW_FORM_ref4, DIEEntry(*PDie)); if (DT->isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); @@ -1675,13 +1707,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { // Emit size of content not including length itself - if (!DD->useSectionsAsReferences()) { - StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; - MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); - EndLabel = Asm->createTempSymbol(Prefix + "end"); - Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit"); - Asm->OutStreamer->emitLabel(BeginLabel); - } else + if (!DD->useSectionsAsReferences()) + EndLabel = Asm->emitDwarfUnitLength( + isDwoUnit() ? "debug_info_dwo" : "debug_info", "Length of Unit"); + else Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(), "Length of Unit"); @@ -1725,20 +1754,18 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) { Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0); } -DIE::value_iterator -DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { - return Die.addValue(DIEValueAllocator, Attribute, - DD->getDwarfSectionOffsetForm(), - new (DIEValueAllocator) DIEDelta(Hi, Lo)); +void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + addAttribute(Die, Attribute, DD->getDwarfSectionOffsetForm(), + new (DIEValueAllocator) DIEDelta(Hi, Lo)); } -DIE::value_iterator -DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, const MCSymbol *Sec) { +void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label); - return addSectionDelta(Die, Attribute, Label, Sec); + addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label); + else + addSectionDelta(Die, Attribute, Label, Sec); } bool DwarfTypeUnit::isDwoUnit() const { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 5c643760fd56..4d31dd0daf59 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -18,6 +18,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/Target/TargetMachine.h" #include <string> namespace llvm { @@ -72,10 +73,26 @@ protected: DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); + bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal); bool isShareableAcrossCUs(const DINode *D) const; + template <typename T> + void addAttribute(DIEValueList &Die, dwarf::Attribute Attribute, + dwarf::Form Form, T &&Value) { + // For strict DWARF mode, only generate attributes available to current + // DWARF version. + // Attribute 0 is used when emitting form-encoded values in blocks, which + // don't have attributes (only forms) so we cannot detect their DWARF + // version compatibility here and assume they are compatible. + if (Attribute != 0 && Asm->TM.Options.DebugStrictDwarf && + DD->getDwarfVersion() < dwarf::AttributeVersion(Attribute)) + return; + + Die.addValue(DIEValueAllocator, + DIEValue(Attribute, Form, std::forward<T>(Value))); + } + public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } @@ -147,10 +164,8 @@ public: void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); /// Add a Dwarf label attribute data and value. - DIEValueList::value_iterator addLabel(DIEValueList &Die, - dwarf::Attribute Attribute, - dwarf::Form Form, - const MCSymbol *Label); + void addLabel(DIEValueList &Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label); void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); @@ -160,10 +175,11 @@ public: /// Add a dwarf op address data and value using the form given and an /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. void addOpAddress(DIELoc &Die, const MCSymbol *Sym); + void addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label); /// Add a label delta attribute data and value. - void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, - const MCSymbol *Lo); + void addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo); /// Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); @@ -179,6 +195,8 @@ public: /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); + void addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, + DIEBlock *Block); /// Add location information to specified debug information entry. void addSourceLine(DIE &Die, unsigned Line, const DIFile *File); @@ -239,7 +257,7 @@ public: /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. - DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); + DIE &createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N = nullptr); bool useSegmentedStringOffsetsTable() const { return DD->useSegmentedStringOffsetsTable(); @@ -269,13 +287,12 @@ public: void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); /// addSectionDelta - Add a label delta attribute data and value. - DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo); + void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); /// Add a Dwarf section label attribute data and value. - DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label, - const MCSymbol *Sec); + void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec); /// Get context owner's DIE. DIE *createTypeDIE(const DICompositeType *Ty); diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 2ffe8a7b0469..e589c2e64abd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -83,10 +83,9 @@ void EHStreamer::computeActionsTable( FilterOffsets.reserve(FilterIds.size()); int Offset = -1; - for (std::vector<unsigned>::const_iterator - I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { + for (unsigned FilterId : FilterIds) { FilterOffsets.push_back(Offset); - Offset -= getULEB128Size(*I); + Offset -= getULEB128Size(FilterId); } FirstActions.reserve(LandingPads.size()); @@ -95,9 +94,7 @@ void EHStreamer::computeActionsTable( unsigned SizeActions = 0; // Total size of all action entries for a function const LandingPadInfo *PrevLPI = nullptr; - for (SmallVectorImpl<const LandingPadInfo *>::const_iterator - I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { - const LandingPadInfo *LPI = *I; + for (const LandingPadInfo *LPI : LandingPads) { const std::vector<int> &TypeIds = LPI->TypeIds; unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad @@ -420,8 +417,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); // Type infos. - MCSection *LSDASection = - Asm->getObjFileLowering().getSectionForLSDA(MF->getFunction(), Asm->TM); + MCSection *LSDASection = Asm->getObjFileLowering().getSectionForLSDA( + MF->getFunction(), *Asm->CurrentFnSym, Asm->TM); unsigned TTypeEncoding; if (!HaveTTData) { @@ -757,10 +754,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Emit the Action Table. int Entry = 0; - for (SmallVectorImpl<ActionEntry>::const_iterator - I = Actions.begin(), E = Actions.end(); I != E; ++I) { - const ActionEntry &Action = *I; - + for (const ActionEntry &Action : Actions) { if (VerboseAsm) { // Emit comments that decode the action table. Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<"); diff --git a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 59a84e6f2d7b..70777f07fc6c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -14,10 +14,9 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/BuiltinGCs.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/BuiltinGCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -79,11 +78,10 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(MD.size()); // And each safe point... - for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; - ++PI) { + for (const GCPoint &P : MD) { // Emit the address of the safe point. OS.AddComment("safe point address"); - MCSymbol *Label = PI->Label; + MCSymbol *Label = P.Label; AP.emitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); } diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 354b638b47a2..a9fb31d42679 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -14,9 +14,9 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/BuiltinGCs.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/BuiltinGCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index e8636052c54c..35a830f416f6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -20,31 +20,6 @@ using namespace llvm; -#define DEBUG_TYPE "pseudoprobe" - -PseudoProbeHandler::~PseudoProbeHandler() = default; - -PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) { - NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName); - assert(FuncInfo && "Pseudo probe descriptors are missing"); - for (const auto *Operand : FuncInfo->operands()) { - const auto *MD = cast<MDNode>(Operand); - auto GUID = - mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue(); - auto Name = cast<MDString>(MD->getOperand(2))->getString(); - // We may see pairs with same name but different GUIDs here in LTO mode, due - // to static same-named functions inlined from other modules into this - // module. Function profiles with the same name will be merged no matter - // whether they are collected on the same function. Therefore we just pick - // up the last <Name, GUID> pair here to represent the same-named function - // collection and all probes from the collection will be merged into a - // single profile eventually. - Names[Name] = GUID; - } - - LLVM_DEBUG(dump()); -} - void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc) { @@ -60,8 +35,7 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, auto Name = SP->getLinkageName(); if (Name.empty()) Name = SP->getName(); - assert(Names.count(Name) && "Pseudo probe descriptor missing for function"); - uint64_t CallerGuid = Names[Name]; + uint64_t CallerGuid = Function::getGUID(Name); uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex( InlinedAt->getDiscriminator()); ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId); @@ -72,13 +46,3 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, ReversedInlineStack.rend()); Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); } - -#ifndef NDEBUG -void PseudoProbeHandler::dump() const { - dbgs() << "\n=============================\n"; - dbgs() << "\nFunction Name to GUID map:\n"; - dbgs() << "\n=============================\n"; - for (const auto &Item : Names) - dbgs() << "Func: " << Item.first << " GUID: " << Item.second << "\n"; -} -#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index bea07ceae9d4..f2026a118bf5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -26,12 +26,9 @@ class DILocation; class PseudoProbeHandler : public AsmPrinterHandler { // Target of pseudo probe emission. AsmPrinter *Asm; - // Name to GUID map - DenseMap<StringRef, uint64_t> Names; public: - PseudoProbeHandler(AsmPrinter *A, Module *M); - ~PseudoProbeHandler() override; + PseudoProbeHandler(AsmPrinter *A) : Asm(A){}; void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc); @@ -43,10 +40,6 @@ public: void endFunction(const MachineFunction *MF) override {} void beginInstruction(const MachineInstr *MI) override {} void endInstruction() override {} - -#ifndef NDEBUG - void dump() const; -#endif }; } // namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 3a9c9df79783..b30d9cc12abc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -55,6 +55,14 @@ void WinException::endModule() { for (const Function &F : *M) if (F.hasFnAttribute("safeseh")) OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); + + if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) { + // Emit the symbol index of each ehcont target. + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); + for (const MCSymbol *S : EHContTargets) { + OS.EmitCOFFSymbolIndex(S); + } + } } void WinException::beginFunction(const MachineFunction *MF) { @@ -164,6 +172,12 @@ void WinException::endFunction(const MachineFunction *MF) { Asm->OutStreamer->PopSection(); } + + if (!MF->getCatchretTargets().empty()) { + // Copy the function's catchret targets to a module-level list. + EHContTargets.insert(EHContTargets.end(), MF->getCatchretTargets().begin(), + MF->getCatchretTargets().end()); + } } /// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock. diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h index 8bd5d1bc6d2a..feea05ba63ad 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -14,6 +14,7 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H #include "EHStreamer.h" +#include <vector> namespace llvm { class GlobalValue; @@ -44,6 +45,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// The section of the last funclet start. MCSection *CurrentFuncletTextSection = nullptr; + /// The list of symbols to add to the ehcont section + std::vector<const MCSymbol *> EHContTargets; + void emitCSpecificHandlerTable(const MachineFunction *MF); void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4026022caa07..125a3be585cb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -78,12 +78,14 @@ namespace { StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); + AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); Value * insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - AtomicOrdering MemOpOrder, + Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); void expandAtomicOpToLLSC( - Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, + Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); void expandPartwordAtomicRMW( AtomicRMWInst *I, @@ -95,8 +97,8 @@ namespace { AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); static Value *insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultType, Value *Addr, - AtomicOrdering MemOpOrder, + IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -234,12 +236,13 @@ bool AtomicExpand::runOnFunction(Function &F) { TLI->shouldExpandAtomicCmpXchgInIR(CASI) == TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || - isAcquireOrStronger(CASI->getSuccessOrdering()))) { + isAcquireOrStronger(CASI->getSuccessOrdering()) || + isAcquireOrStronger(CASI->getFailureOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by // expandAtomicCmpXchg in that case. - FenceOrdering = CASI->getSuccessOrdering(); + FenceOrdering = CASI->getMergedOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); } @@ -280,9 +283,18 @@ bool AtomicExpand::runOnFunction(Function &F) { if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + if (Op == AtomicRMWInst::Xchg && + RMWI->getValOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + RMWI = convertAtomicXchgToIntegerType(RMWI); + assert(RMWI->getValOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); - AtomicRMWInst::BinOp Op = RMWI->getOperation(); if (ValueSize < MinCASSize && (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || Op == AtomicRMWInst::And)) { @@ -362,13 +374,40 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { return NewLI; } +AtomicRMWInst * +AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { + auto *M = RMWI->getModule(); + Type *NewTy = + getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); + + IRBuilder<> Builder(RMWI); + + Value *Addr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + Value *NewVal = Builder.CreateBitCast(Val, NewTy); + + auto *NewRMWI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, + RMWI->getAlign(), RMWI->getOrdering()); + NewRMWI->setVolatile(RMWI->isVolatile()); + LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); + + Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + RMWI->replaceAllUsesWith(NewRVal); + RMWI->eraseFromParent(); + return NewRMWI; +} + bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: expandAtomicOpToLLSC( - LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(), + LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), + LI->getOrdering(), [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: @@ -386,8 +425,8 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed // to be single-copy atomic by ARM is an ldrexd (A3.5.3). - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), + LI->getPointerOperand(), LI->getOrdering()); TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); @@ -403,11 +442,11 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { Order = AtomicOrdering::Monotonic; Value *Addr = LI->getPointerOperand(); - Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + Type *Ty = LI->getType(); Constant *DummyVal = Constant::getNullValue(Ty); Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, DummyVal, DummyVal, Order, + Addr, DummyVal, DummyVal, LI->getAlign(), Order, AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); @@ -454,9 +493,9 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); + AtomicRMWInst *AI = Builder.CreateAtomicRMW( + AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), + SI->getAlign(), SI->getOrdering()); SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. @@ -464,8 +503,8 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { } static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, - Value *Loaded, Value *NewVal, - AtomicOrdering MemOpOrder, + Value *Loaded, Value *NewVal, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { Type *OrigTy = NewVal->getType(); @@ -479,9 +518,9 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Loaded = Builder.CreateBitCast(Loaded, IntTy); } - Value* Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, AddrAlign, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); @@ -546,7 +585,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { AI->getValOperand()); }; expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), - AI->getOrdering(), PerformOp); + AI->getAlign(), AI->getOrdering(), PerformOp); } return true; } @@ -581,6 +620,7 @@ struct PartwordMaskValues { Type *WordType = nullptr; Type *ValueType = nullptr; Value *AlignedAddr = nullptr; + Align AlignedAddrAlignment; // The remaining fields can be null. Value *ShiftAmt = nullptr; Value *Mask = nullptr; @@ -603,6 +643,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { PrintObj(PMV.ValueType); O << " AlignedAddr: "; PrintObj(PMV.AlignedAddr); + O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; O << " ShiftAmt: "; PrintObj(PMV.ShiftAmt); O << " Mask: "; @@ -633,6 +674,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, + Align AddrAlign, unsigned MinWordSize) { PartwordMaskValues PMV; @@ -646,6 +688,9 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, : ValueType; if (PMV.ValueType == PMV.WordType) { PMV.AlignedAddr = Addr; + PMV.AlignedAddrAlignment = AddrAlign; + PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); + PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); return PMV; } @@ -654,10 +699,12 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *WordPtrType = PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + // TODO: we could skip some of this if AddrAlign >= MinWordSize. Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); PMV.AlignedAddr = Builder.CreateIntToPtr( Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); + PMV.AlignedAddrAlignment = Align(MinWordSize); Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { @@ -760,12 +807,13 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { AtomicOrdering MemOpOrder = AI->getOrdering(); + SyncScope::ID SSID = AI->getSyncScopeID(); IRBuilder<> Builder(AI); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -778,13 +826,15 @@ void AtomicExpand::expandPartwordAtomicRMW( Value *OldResult; if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { - OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); + OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, + PMV.AlignedAddrAlignment, MemOpOrder, + SSID, PerformPartwordOp, + createCmpXchgInstFun); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, - MemOpOrder, PerformPartwordOp); + PMV.AlignedAddrAlignment, MemOpOrder, + PerformPartwordOp); } Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); @@ -803,7 +853,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -817,8 +867,9 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { else NewOperand = ValOperand_Shifted; - AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, - NewOperand, AI->getOrdering()); + AtomicRMWInst *NewAI = + Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, + PMV.AlignedAddrAlignment, AI->getOrdering()); Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); @@ -871,8 +922,6 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { IRBuilder<> Builder(CI); LLVMContext &Ctx = Builder.getContext(); - const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8; - BasicBlock *EndBB = BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); auto FailureBB = @@ -884,8 +933,9 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - PartwordMaskValues PMV = createMaskInstrs( - Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); // Shift the incoming values over, into the right location in the word. Value *NewVal_Shifted = @@ -909,8 +959,8 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( - PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSyncScopeID()); + PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, + CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -953,11 +1003,12 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { } void AtomicExpand::expandAtomicOpToLLSC( - Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, + Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { IRBuilder<> Builder(I); - Value *Loaded = - insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp); + Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, + MemOpOrder, PerformOp); I->replaceAllUsesWith(Loaded); I->eraseFromParent(); @@ -968,7 +1019,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); // The value operand must be sign-extended for signed min/max so that the // target's signed comparison instructions can be used. Otherwise, just @@ -994,7 +1045,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *CmpVal_Shifted = Builder.CreateShl( Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, @@ -1004,7 +1055,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { "NewVal_Shifted"); Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, - CI->getSuccessOrdering()); + CI->getMergedOrdering()); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); @@ -1017,13 +1068,17 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { } Value *AtomicExpand::insertRMWLLSCLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); + assert(AddrAlign >= + F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) && + "Expected at least natural alignment at this point."); + // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: @@ -1048,7 +1103,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); Value *NewVal = PerformOp(Builder, Loaded); @@ -1082,11 +1137,9 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); - - auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, - CI->getSuccessOrdering(), - CI->getFailureOrdering(), - CI->getSyncScopeID()); + auto *NewCI = Builder.CreateAtomicCmpXchg( + NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); @@ -1117,8 +1170,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); - AtomicOrdering MemOpOrder = - ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; + AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic + ? AtomicOrdering::Monotonic + : CI->getMergedOrdering(); // In implementations which use a barrier to achieve release semantics, we can // delay emitting this barrier until we know a store is actually going to be @@ -1211,13 +1265,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { PartwordMaskValues PMV = createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, - TLI->getMinCmpXchgSizeInBits() / 8); + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); Value *UnreleasedLoad = - TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *UnreleasedLoadExtract = extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( @@ -1250,7 +1304,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + SecondLoad = + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, CI->getCompareOperand(), "should_store"); @@ -1379,8 +1434,8 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { } Value *AtomicExpand::insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - AtomicOrdering MemOpOrder, + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg) { LLVMContext &Ctx = Builder.getContext(); @@ -1411,9 +1466,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); - // Atomics require at least natural alignment. - InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8)); + LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1426,11 +1479,11 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Value *NewLoaded = nullptr; Value *Success = nullptr; - CreateCmpXchg(Builder, Addr, Loaded, NewVal, + CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder == AtomicOrdering::Unordered ? AtomicOrdering::Monotonic : MemOpOrder, - Success, NewLoaded); + SSID, Success, NewLoaded); assert(Success && NewLoaded); Loaded->addIncoming(NewLoaded, LoopBB); @@ -1466,7 +1519,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { IRBuilder<> Builder(AI); Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( - Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), + Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), + AI->getOrdering(), AI->getSyncScopeID(), [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); @@ -1614,20 +1668,20 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // libcalls (add/sub/etc) and we needed a generic. So, expand to a // CAS libcall, via a CAS loop, instead. if (!Success) { - expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr, - Value *Loaded, Value *NewVal, - AtomicOrdering MemOpOrder, - Value *&Success, Value *&NewLoaded) { - // Create the CAS instruction normally... - AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); - Success = Builder.CreateExtractValue(Pair, 1, "success"); - NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - - // ...and then expand the CAS into a libcall. - expandAtomicCASToLibcall(Pair); - }); + expandAtomicRMWToCmpXchg( + I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, + Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { + // Create the CAS instruction normally... + AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, Alignment, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + + // ...and then expand the CAS into a libcall. + expandAtomicCASToLibcall(Pair); + }); } } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 7499ea8b42d4..1a6eed272ca2 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -88,6 +88,12 @@ cl::opt<std::string> llvm::BBSectionsColdTextPrefix( cl::desc("The text prefix to use for cold basic block clusters"), cl::init(".text.split."), cl::Hidden); +cl::opt<bool> BBSectionsDetectSourceDrift( + "bbsections-detect-source-drift", + cl::desc("This checks if there is a fdo instr. profile hash " + "mismatch for this function"), + cl::init(true), cl::Hidden); + namespace { // This struct represents the cluster information for a machine basic block. @@ -303,20 +309,51 @@ static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { MachineBasicBlock::iterator MI = MBB.begin(); while (!MI->isEHLabel()) ++MI; - MCInst Noop; - MF.getSubtarget().getInstrInfo()->getNoop(Noop); + MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop(); BuildMI(MBB, MI, DebugLoc(), - MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode())); + MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode())); return false; } } return true; } +// This checks if the source of this function has drifted since this binary was +// profiled previously. For now, we are piggy backing on what PGO does to +// detect this with instrumented profiles. PGO emits an hash of the IR and +// checks if the hash has changed. Advanced basic block layout is usually done +// on top of PGO optimized binaries and hence this check works well in practice. +static bool hasInstrProfHashMismatch(MachineFunction &MF) { + if (!BBSectionsDetectSourceDrift) + return false; + + const char MetadataName[] = "instr_prof_hash_mismatch"; + auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation); + if (Existing) { + MDTuple *Tuple = cast<MDTuple>(Existing); + for (auto &N : Tuple->operands()) + if (cast<MDString>(N.get())->getString() == MetadataName) + return true; + } + + return false; +} + bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { auto BBSectionsType = MF.getTarget().getBBSectionsType(); assert(BBSectionsType != BasicBlockSection::None && "BB Sections not enabled!"); + + // Check for source drift. If the source has changed since the profiles + // were obtained, optimizing basic blocks might be sub-optimal. + // This only applies to BasicBlockSection::List as it creates + // clusters of basic blocks using basic block ids. Source drift can + // invalidate these groupings leading to sub-optimal code generation with + // regards to performance. + if (BBSectionsType == BasicBlockSection::List && + hasInstrProfHashMismatch(MF)) + return true; + // Renumber blocks before sorting them for basic block sections. This is // useful during sorting, basic blocks in the same section will retain the // default order. This renumbering should also be done for basic block diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index fd3f465fb390..65e7e92fe152 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -164,10 +164,10 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { TriedMerging.erase(MBB); // Update call site info. - std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + for (const MachineInstr &MI : *MBB) if (MI.shouldUpdateCallSiteInfo()) MF->eraseCallSiteInfo(&MI); - }); + // Remove the block. MF->erase(MBB); EHScopeMembership.erase(MBB); @@ -286,7 +286,7 @@ static unsigned HashMachineInstr(const MachineInstr &MI) { /// HashEndOfMBB - Hash the last instruction in the MBB. static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { - MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(false); if (I == MBB.end()) return 0; @@ -566,9 +566,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // Move the iterators to the beginning of the MBB if we only got debug // instructions before the tail. This is to avoid splitting a block when we // only got debug instructions before the tail (to be invariant on -g). - if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1) + if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end(), false) == I1) I1 = MBB1->begin(); - if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2) + if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end(), false) == I2) I2 = MBB2->begin(); bool FullBlockTail1 = I1 == MBB1->begin(); @@ -1217,7 +1217,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Blocks should be considered empty if they contain only debug info; // else the debug info would affect codegen. static bool IsEmptyBlock(MachineBasicBlock *MBB) { - return MBB->getFirstNonDebugInstr() == MBB->end(); + return MBB->getFirstNonDebugInstr(true) == MBB->end(); } // Blocks with only debug info and branches should be considered the same @@ -1919,8 +1919,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MachineBasicBlock::iterator FIE = FBB->end(); while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - TIB = skipDebugInstructionsForward(TIB, TIE); - FIB = skipDebugInstructionsForward(FIB, FIE); + TIB = skipDebugInstructionsForward(TIB, TIE, false); + FIB = skipDebugInstructionsForward(FIB, FIE, false); if (TIB == TIE || FIB == FIE) break; diff --git a/llvm/lib/CodeGen/BuiltinGCs.cpp b/llvm/lib/CodeGen/BuiltinGCs.cpp deleted file mode 100644 index bfc10cb3fef2..000000000000 --- a/llvm/lib/CodeGen/BuiltinGCs.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the boilerplate required to define our various built in -// gc lowering strategies. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/BuiltinGCs.h" -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/Support/Casting.h" - -using namespace llvm; - -namespace { - -/// An example GC which attempts to be compatibile with Erlang/OTP garbage -/// collector. -/// -/// The frametable emitter is in ErlangGCPrinter.cpp. -class ErlangGC : public GCStrategy { -public: - ErlangGC() { - NeededSafePoints = true; - UsesMetadata = true; - } -}; - -/// An example GC which attempts to be compatible with Objective Caml 3.10.0 -/// -/// The frametable emitter is in OcamlGCPrinter.cpp. -class OcamlGC : public GCStrategy { -public: - OcamlGC() { - NeededSafePoints = true; - UsesMetadata = true; - } -}; - -/// A GC strategy for uncooperative targets. This implements lowering for the -/// llvm.gc* intrinsics for targets that do not natively support them (which -/// includes the C backend). Note that the code generated is not quite as -/// efficient as algorithms which generate stack maps to identify roots. -/// -/// In order to support this particular transformation, all stack roots are -/// coallocated in the stack. This allows a fully target-independent stack map -/// while introducing only minor runtime overhead. -class ShadowStackGC : public GCStrategy { -public: - ShadowStackGC() {} -}; - -/// A GCStrategy which serves as an example for the usage of a statepoint based -/// lowering strategy. This GCStrategy is intended to suitable as a default -/// implementation usable with any collector which can consume the standard -/// stackmap format generated by statepoints, uses the default addrespace to -/// distinguish between gc managed and non-gc managed pointers, and has -/// reasonable relocation semantics. -class StatepointGC : public GCStrategy { -public: - StatepointGC() { - UseStatepoints = true; - // These options are all gc.root specific, we specify them so that the - // gc.root lowering code doesn't run. - NeededSafePoints = false; - UsesMetadata = false; - } - - Optional<bool> isGCManagedPointer(const Type *Ty) const override { - // Method is only valid on pointer typed values. - const PointerType *PT = cast<PointerType>(Ty); - // For the sake of this example GC, we arbitrarily pick addrspace(1) as our - // GC managed heap. We know that a pointer into this heap needs to be - // updated and that no other pointer does. Note that addrspace(1) is used - // only as an example, it has no special meaning, and is not reserved for - // GC usage. - return (1 == PT->getAddressSpace()); - } -}; - -/// A GCStrategy for the CoreCLR Runtime. The strategy is similar to -/// Statepoint-example GC, but differs from it in certain aspects, such as: -/// 1) Base-pointers need not be explicitly tracked and reported for -/// interior pointers -/// 2) Uses a different format for encoding stack-maps -/// 3) Location of Safe-point polls: polls are only needed before loop-back -/// edges and before tail-calls (not needed at function-entry) -/// -/// The above differences in behavior are to be implemented in upcoming -/// checkins. -class CoreCLRGC : public GCStrategy { -public: - CoreCLRGC() { - UseStatepoints = true; - // These options are all gc.root specific, we specify them so that the - // gc.root lowering code doesn't run. - NeededSafePoints = false; - UsesMetadata = false; - } - - Optional<bool> isGCManagedPointer(const Type *Ty) const override { - // Method is only valid on pointer typed values. - const PointerType *PT = cast<PointerType>(Ty); - // We pick addrspace(1) as our GC managed heap. - return (1 == PT->getAddressSpace()); - } -}; - -} // end anonymous namespace - -// Register all the above so that they can be found at runtime. Note that -// these static initializers are important since the registration list is -// constructed from their storage. -static GCRegistry::Add<ErlangGC> A("erlang", - "erlang-compatible garbage collector"); -static GCRegistry::Add<OcamlGC> B("ocaml", "ocaml 3.10-compatible GC"); -static GCRegistry::Add<ShadowStackGC> - C("shadow-stack", "Very portable GC for uncooperative code generators"); -static GCRegistry::Add<StatepointGC> D("statepoint-example", - "an example strategy for statepoint"); -static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC"); - -// Provide hook to ensure the containing library is fully loaded. -void llvm::linkAllBuiltinGCs() {} diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 23c7fea01f28..1c2e3f998449 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -157,7 +157,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { // Initialize MBBMap. for (MachineBasicBlock &MBB : MF) { - MBBCFAInfo MBBInfo; + MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()]; MBBInfo.MBB = &MBB; MBBInfo.IncomingCFAOffset = InitialOffset; MBBInfo.OutgoingCFAOffset = InitialOffset; @@ -165,7 +165,6 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { MBBInfo.OutgoingCFARegister = InitialRegister; MBBInfo.IncomingCSRSaved.resize(NumRegs); MBBInfo.OutgoingCSRSaved.resize(NumRegs); - MBBVector[MBB.getNumber()] = MBBInfo; } CSRLocMap.clear(); @@ -220,6 +219,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpRestore: CSRRestored.set(CFI.getRegister()); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + // TODO: Add support for handling cfi_def_aspace_cfa. +#ifndef NDEBUG + report_fatal_error( + "Support for cfi_llvm_def_aspace_cfa not implemented! Value of CFA " + "may be incorrect!\n"); +#endif + break; case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG @@ -265,9 +272,9 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { MBBInfo.OutgoingCFARegister = SetRegister; // Update outgoing CSR info. - MBBInfo.OutgoingCSRSaved = MBBInfo.IncomingCSRSaved; - MBBInfo.OutgoingCSRSaved |= CSRSaved; - MBBInfo.OutgoingCSRSaved.reset(CSRRestored); + BitVector::apply([](auto x, auto y, auto z) { return (x | y) & ~z; }, + MBBInfo.OutgoingCSRSaved, MBBInfo.IncomingCSRSaved, CSRSaved, + CSRRestored); } void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { @@ -295,6 +302,7 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool InsertedCFIInstr = false; + BitVector SetDifference; for (MachineBasicBlock &MBB : MF) { // Skip the first MBB in a function if (MBB.getNumber() == MF.front().getNumber()) continue; @@ -346,8 +354,8 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { continue; } - BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved; - SetDifference.reset(MBBInfo.IncomingCSRSaved); + BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference, + PrevMBBInfo->OutgoingCSRSaved, MBBInfo.IncomingCSRSaved); for (int Reg : SetDifference.set_bits()) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); @@ -356,8 +364,8 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { InsertedCFIInstr = true; } - SetDifference = MBBInfo.IncomingCSRSaved; - SetDifference.reset(PrevMBBInfo->OutgoingCSRSaved); + BitVector::apply([](auto x, auto y) { return x & ~y; }, SetDifference, + MBBInfo.IncomingCSRSaved, PrevMBBInfo->OutgoingCSRSaved); for (int Reg : SetDifference.set_bits()) { auto it = CSRLocMap.find(Reg); assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap"); diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 16f380c1eb62..863a0e1e0b56 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/StackMaps.h" #include <cassert> #include <tuple> @@ -125,6 +126,16 @@ static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, return true; } +bool VirtRegAuxInfo::isLiveAtStatepointVarArg(LiveInterval &LI) { + return any_of(VRM.getRegInfo().reg_operands(LI.reg()), + [](MachineOperand &MO) { + MachineInstr *MI = MO.getParent(); + if (MI->getOpcode() != TargetOpcode::STATEPOINT) + return false; + return StatepointOpers(MI).getVarIdx() <= MI->getOperandNo(&MO); + }); +} + void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) { float Weight = weightCalcHelper(LI); // Check if unspillable. @@ -290,9 +301,15 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // Mark li as unspillable if all live ranges are tiny and the interval // is not live at any reg mask. If the interval is live at a reg mask - // spilling may be required. + // spilling may be required. If li is live as use in statepoint instruction + // spilling may be required due to if we mark interval with use in statepoint + // as not spillable we are risky to end up with no register to allocate. + // At the same time STATEPOINT instruction is perfectly fine to have this + // operand on stack, so spilling such interval and folding its load from stack + // into instruction itself makes perfect sense. if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) && - !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) { + !LI.isLiveAtIndexes(LIS.getRegMaskSlots()) && + !isLiveAtStatepointVarArg(LI)) { LI.markNotSpillable(); return -1.0; } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index d2400d0371e3..e0e2db9f4725 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -97,8 +97,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegUsageInfoCollectorPass(Registry); initializeRegUsageInfoPropagationPass(Registry); initializeRegisterCoalescerPass(Registry); + initializeRemoveRedundantDebugValuesPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); + initializeShadowStackGCLoweringPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); initializeSlotIndexesPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b2bc75c19709..77ce3d2fb563 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -46,6 +46,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -377,6 +378,7 @@ class TypePromotionTransaction; } void removeAllAssertingVHReferences(Value *V); + bool eliminateAssumptions(Function &F); bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -404,6 +406,7 @@ class TypePromotionTransaction; bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); bool fixupDbgValue(Instruction *I); bool placeDbgValues(Function &F); + bool placePseudoProbes(Function &F); bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted); bool tryToPromoteExts(TypePromotionTransaction &TPT, @@ -506,6 +509,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { } } + // Get rid of @llvm.assume builtins before attempting to eliminate empty + // blocks, since there might be blocks that only contain @llvm.assume calls + // (plus arguments that we can get rid of). + EverMadeChange |= eliminateAssumptions(F); + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= eliminateMostlyEmptyBlocks(F); @@ -566,10 +574,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= ConstantFoldTerminator(&BB, true); if (!MadeChange) continue; - for (SmallVectorImpl<BasicBlock*>::iterator - II = Successors.begin(), IE = Successors.end(); II != IE; ++II) - if (pred_empty(*II)) - WorkList.insert(*II); + for (BasicBlock *Succ : Successors) + if (pred_empty(Succ)) + WorkList.insert(Succ); } // Delete the dead blocks and any of their dead successors. @@ -580,10 +587,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { DeleteDeadBlock(BB); - for (SmallVectorImpl<BasicBlock*>::iterator - II = Successors.begin(), IE = Successors.end(); II != IE; ++II) - if (pred_empty(*II)) - WorkList.insert(*II); + for (BasicBlock *Succ : Successors) + if (pred_empty(Succ)) + WorkList.insert(Succ); } // Merge pairs of basic blocks with unconditional branches, connected by @@ -607,6 +613,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Do this last to clean up use-before-def scenarios introduced by other // preparatory transforms. EverMadeChange |= placeDbgValues(F); + EverMadeChange |= placePseudoProbes(F); #ifndef NDEBUG if (VerifyBFIUpdates) @@ -616,6 +623,26 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +bool CodeGenPrepare::eliminateAssumptions(Function &F) { + bool MadeChange = false; + for (BasicBlock &BB : F) { + CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + Instruction *I = &*(CurInstIterator++); + if (auto *Assume = dyn_cast<AssumeInst>(I)) { + MadeChange = true; + Value *Operand = Assume->getOperand(0); + Assume->eraseFromParent(); + + resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { + RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); + }); + } + } + } + return MadeChange; +} + /// An instruction is about to be deleted, so remove all references to it in our /// GEP-tracking data strcutures. void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { @@ -780,8 +807,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, // Skip merging if the block's successor is also a successor to any callbr // that leads to this block. // FIXME: Is this really needed? Is this a correctness issue? - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator())) + for (BasicBlock *Pred : predecessors(BB)) { + if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator())) for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) if (DestBB == CBI->getSuccessor(i)) return false; @@ -822,9 +849,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, // Find all other incoming blocks from which incoming values of all PHIs in // DestBB are the same as the ones from BB. - for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E; - ++PI) { - BasicBlock *DestBBPred = *PI; + for (BasicBlock *DestBBPred : predecessors(DestBB)) { if (DestBBPred == BB) continue; @@ -964,8 +989,8 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - PN.addIncoming(InVal, *PI); + for (BasicBlock *Pred : predecessors(BB)) + PN.addIncoming(InVal, Pred); } } } @@ -1280,11 +1305,83 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, return SinkCast(CI); } +// Match a simple increment by constant operation. Note that if a sub is +// matched, the step is negated (as if the step had been canonicalized to +// an add, even though we leave the instruction alone.) +bool matchIncrement(const Instruction* IVInc, Instruction *&LHS, + Constant *&Step) { + if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || + match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>( + m_Instruction(LHS), m_Constant(Step))))) + return true; + if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) || + match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>( + m_Instruction(LHS), m_Constant(Step))))) { + Step = ConstantExpr::getNeg(Step); + return true; + } + return false; +} + +/// If given \p PN is an inductive variable with value IVInc coming from the +/// backedge, and on each iteration it gets increased by Step, return pair +/// <IVInc, Step>. Otherwise, return None. +static Optional<std::pair<Instruction *, Constant *> > +getIVIncrement(const PHINode *PN, const LoopInfo *LI) { + const Loop *L = LI->getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) + return None; + auto *IVInc = + dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch())); + if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) + return None; + Instruction *LHS = nullptr; + Constant *Step = nullptr; + if (matchIncrement(IVInc, LHS, Step) && LHS == PN) + return std::make_pair(IVInc, Step); + return None; +} + +static bool isIVIncrement(const Value *V, const LoopInfo *LI) { + auto *I = dyn_cast<Instruction>(V); + if (!I) + return false; + Instruction *LHS = nullptr; + Constant *Step = nullptr; + if (!matchIncrement(I, LHS, Step)) + return false; + if (auto *PN = dyn_cast<PHINode>(LHS)) + if (auto IVInc = getIVIncrement(PN, LI)) + return IVInc->first == I; + return false; +} + bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { - if (BO->getParent() != Cmp->getParent()) { + auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) { + if (!isIVIncrement(BO, LI)) + return false; + const Loop *L = LI->getLoopFor(BO->getParent()); + assert(L && "L should not be null after isIVIncrement()"); + // Do not risk on moving increment into a child loop. + if (LI->getLoopFor(Cmp->getParent()) != L) + return false; + + // Finally, we need to ensure that the insert point will dominate all + // existing uses of the increment. + + auto &DT = getDT(*BO->getParent()->getParent()); + if (DT.dominates(Cmp->getParent(), BO->getParent())) + // If we're moving up the dom tree, all uses are trivially dominated. + // (This is the common case for code produced by LSR.) + return true; + + // Otherwise, special case the single use in the phi recurrence. + return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch()); + }; + if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) { // We used to use a dominator tree here to allow multi-block optimization. // But that was problematic because: // 1. It could cause a perf regression by hoisting the math op into the @@ -1295,6 +1392,14 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, // This is because we recompute the DT on every change in the main CGP // run-loop. The recomputing is probably unnecessary in many cases, so if // that was fixed, using a DT here would be ok. + // + // There is one important particular case we still want to handle: if BO is + // the IV increment. Important properties that make it profitable: + // - We can speculate IV increment anywhere in the loop (as long as the + // indvar Phi is its only user); + // - Upon computing Cmp, we effectively compute something equivalent to the + // IV increment (despite it loops differently in the IR). So moving it up + // to the cmp point does not really increase register pressure. return false; } @@ -1936,6 +2041,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) return false; + // Bail if the value is never zero. + if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL)) + return false; + // The intrinsic will be sunk behind a compare against zero and branch. BasicBlock *StartBlock = CountZeros->getParent(); BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); @@ -2061,18 +2170,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { if (II) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::assume: { - Value *Operand = II->getOperand(0); - II->eraseFromParent(); - // Prune the operand, it's most likely dead. - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - RecursivelyDeleteTriviallyDeadInstructions( - Operand, TLInfo, nullptr, - [&](Value *V) { removeAllAssertingVHReferences(V); }); - }); - return true; - } - + case Intrinsic::assume: + llvm_unreachable("llvm.assume should have been removed already"); case Intrinsic::experimental_widenable_condition: { // Give up on future widening oppurtunties so that we can fold away dead // paths and merge blocks before going into block-local instruction @@ -2242,21 +2341,25 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT if (PN && PN->getParent() != BB) return false; - // Make sure there are no instructions between the PHI and return, or that the - // return is the first instruction in the block. - if (PN) { - BasicBlock::iterator BI = BB->begin(); - // Skip over debug and the bitcast. - do { - ++BI; - } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI || - isa<PseudoProbeInst>(BI)); - if (&*BI != RetI) - return false; - } else { - if (BB->getFirstNonPHIOrDbg(true) != RetI) - return false; - } + auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) { + const BitCastInst *BC = dyn_cast<BitCastInst>(Inst); + if (BC && BC->hasOneUse()) + Inst = BC->user_back(); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + return II->getIntrinsicID() == Intrinsic::lifetime_end; + return false; + }; + + // Make sure there are no instructions between the first instruction + // and return. + const Instruction *BI = BB->getFirstNonPHI(); + // Skip over debug and the bitcast. + while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI || + isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI)) + BI = BI->getNextNode(); + if (BI != RetI) + return false; /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. @@ -2276,14 +2379,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - if (!VisitedBBs.insert(*PI).second) + for (BasicBlock *Pred : predecessors(BB)) { + if (!VisitedBBs.insert(Pred).second) continue; - if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) { + if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { CallInst *CI = dyn_cast<CallInst>(I); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(*PI); + TailCallBBs.push_back(Pred); } } } @@ -2775,11 +2878,16 @@ class TypePromotionTransaction { /// Keep track of the debug users. SmallVector<DbgValueInst *, 1> DbgValues; + /// Keep track of the new value so that we can undo it by replacing + /// instances of the new value with the original value. + Value *New; + using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; public: /// Replace all the use of \p Inst by \p New. - UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { + UsesReplacer(Instruction *Inst, Value *New) + : TypePromotionAction(Inst), New(New) { LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New << "\n"); // Record the original uses. @@ -2798,20 +2906,14 @@ class TypePromotionTransaction { /// Reassign the original uses of Inst to Inst. void undo() override { LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); - for (use_iterator UseIt = OriginalUses.begin(), - EndIt = OriginalUses.end(); - UseIt != EndIt; ++UseIt) { - UseIt->Inst->setOperand(UseIt->Idx, Inst); - } + for (InstructionAndIdx &Use : OriginalUses) + Use.Inst->setOperand(Use.Idx, Inst); // RAUW has replaced all original uses with references to the new value, // including the debug uses. Since we are undoing the replacements, // the original debug uses must also be reinstated to maintain the // correctness and utility of debug value instructions. - for (auto *DVI: DbgValues) { - LLVMContext &Ctx = Inst->getType()->getContext(); - auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst)); - DVI->setOperand(0, MV); - } + for (auto *DVI : DbgValues) + DVI->replaceVariableLocationOp(New, Inst); } }; @@ -2981,9 +3083,8 @@ TypePromotionTransaction::getRestorationPoint() const { } bool TypePromotionTransaction::commit() { - for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; - ++It) - (*It)->commit(); + for (std::unique_ptr<TypePromotionAction> &Action : Actions) + Action->commit(); bool Modified = !Actions.empty(); Actions.clear(); return Modified; @@ -3007,6 +3108,8 @@ class AddressingModeMatcher { const TargetLowering &TLI; const TargetRegisterInfo &TRI; const DataLayout &DL; + const LoopInfo &LI; + const std::function<const DominatorTree &()> getDTFn; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and /// the memory instruction that we're computing this address for. @@ -3042,16 +3145,18 @@ class AddressingModeMatcher { AddressingModeMatcher( SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, - const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, - ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, - InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, + const TargetRegisterInfo &TRI, const LoopInfo &LI, + const std::function<const DominatorTree &()> getDTFn, + Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, + const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT, std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), - DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), - MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), - OptSize(OptSize), PSI(PSI), BFI(BFI) { + DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), + AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), + InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), + LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { IgnoreProfitability = false; } @@ -3066,18 +3171,18 @@ public: static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, SmallVectorImpl<Instruction *> &AddrModeInsts, - const TargetLowering &TLI, const TargetRegisterInfo &TRI, - const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT, + const TargetLowering &TLI, const LoopInfo &LI, + const std::function<const DominatorTree &()> getDTFn, + const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, + InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, - MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT, LargeOffsetGEP, - OptSize, PSI, BFI) - .matchAddr(V, 0); + bool Success = AddressingModeMatcher( + AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + BFI).matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } @@ -3773,11 +3878,12 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now // to see if ScaleReg is actually X+C. If so, we can turn this into adding - // X*Scale + C*Scale to addr mode. + // X*Scale + C*Scale to addr mode. If we found available IV increment, do not + // go any further: we can reuse it and cannot eliminate it. ConstantInt *CI = nullptr; Value *AddLHS = nullptr; - if (isa<Instruction>(ScaleReg) && // not a constant expr. + if (isa<Instruction>(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && - CI->getValue().isSignedIntN(64)) { + !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) { TestAddrMode.InBounds = false; TestAddrMode.ScaledReg = AddLHS; TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; @@ -3789,9 +3895,75 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, AddrMode = TestAddrMode; return true; } + // Restore status quo. + TestAddrMode = AddrMode; + } + + // If this is an add recurrence with a constant step, return the increment + // instruction and the canonicalized step. + auto GetConstantStep = [this](const Value * V) + ->Optional<std::pair<Instruction *, APInt> > { + auto *PN = dyn_cast<PHINode>(V); + if (!PN) + return None; + auto IVInc = getIVIncrement(PN, &LI); + if (!IVInc) + return None; + // TODO: The result of the intrinsics above is two-compliment. However when + // IV inc is expressed as add or sub, iv.next is potentially a poison value. + // If it has nuw or nsw flags, we need to make sure that these flags are + // inferrable at the point of memory instruction. Otherwise we are replacing + // well-defined two-compliment computation with poison. Currently, to avoid + // potentially complex analysis needed to prove this, we reject such cases. + if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first)) + if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) + return None; + if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second)) + return std::make_pair(IVInc->first, ConstantStep->getValue()); + return None; + }; + + // Try to account for the following special case: + // 1. ScaleReg is an inductive variable; + // 2. We use it with non-zero offset; + // 3. IV's increment is available at the point of memory instruction. + // + // In this case, we may reuse the IV increment instead of the IV Phi to + // achieve the following advantages: + // 1. If IV step matches the offset, we will have no need in the offset; + // 2. Even if they don't match, we will reduce the overlap of living IV + // and IV increment, that will potentially lead to better register + // assignment. + if (AddrMode.BaseOffs) { + if (auto IVStep = GetConstantStep(ScaleReg)) { + Instruction *IVInc = IVStep->first; + // The following assert is important to ensure a lack of infinite loops. + // This transforms is (intentionally) the inverse of the one just above. + // If they don't agree on the definition of an increment, we'd alternate + // back and forth indefinitely. + assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep"); + APInt Step = IVStep->second; + APInt Offset = Step * AddrMode.Scale; + if (Offset.isSignedIntN(64)) { + TestAddrMode.InBounds = false; + TestAddrMode.ScaledReg = IVInc; + TestAddrMode.BaseOffs -= Offset.getLimitedValue(); + // If this addressing mode is legal, commit it.. + // (Note that we defer the (expensive) domtree base legality check + // to the very last possible point.) + if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) && + getDTFn().dominates(IVInc, MemoryInst)) { + AddrModeInsts.push_back(cast<Instruction>(IVInc)); + AddrMode = TestAddrMode; + return true; + } + // Restore status quo. + TestAddrMode = AddrMode; + } + } } - // Otherwise, not (x+c)*scale, just return what we have. + // Otherwise, just return what we have. return true; } @@ -4881,9 +5053,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 0); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher( - MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, + AddressAccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, + LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4986,9 +5159,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrModeInsts.clear(); std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, 0); + // Defer the query (and possible computation of) the dom tree to point of + // actual use. It's expected that most address matches don't actually need + // the domtree. + auto getDTFn = [MemoryInst, this]() -> const DominatorTree & { + Function *F = MemoryInst->getParent()->getParent(); + return this->getDT(*F); + }; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn, + *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI.get()); GetElementPtrInst *GEP = LargeOffsetGEP.first; @@ -5373,14 +5553,19 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, IRBuilder<> Builder(MemoryInst); + Type *SourceTy = GEP->getSourceElementType(); Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); // If the final index isn't a vector, emit a scalar GEP containing all ops // and a vector GEP with all zeroes final index. if (!Ops[FinalIndex]->getType()->isVectorTy()) { - NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); + NewAddr = Builder.CreateGEP(SourceTy, Ops[0], + makeArrayRef(Ops).drop_front()); auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); - NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); + auto *SecondTy = GetElementPtrInst::getIndexedType( + SourceTy, makeArrayRef(Ops).drop_front()); + NewAddr = + Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy)); } else { Value *Base = Ops[0]; Value *Index = Ops[FinalIndex]; @@ -5389,11 +5574,14 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, if (Ops.size() != 2) { // Replace the last index with 0. Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); - Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + Base = Builder.CreateGEP(SourceTy, Base, + makeArrayRef(Ops).drop_front()); + SourceTy = GetElementPtrInst::getIndexedType( + SourceTy, makeArrayRef(Ops).drop_front()); } // Now create the GEP with scalar pointer and vector index. - NewAddr = Builder.CreateGEP(Base, Index); + NewAddr = Builder.CreateGEP(SourceTy, Base, Index); } } else if (!isa<Constant>(Ptr)) { // Not a GEP, maybe its a splat and we can create a GEP to enable @@ -5409,7 +5597,16 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // Emit a vector GEP with a scalar pointer and all 0s vector index. Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); - NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy)); + Type *ScalarTy; + if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() == + Intrinsic::masked_gather) { + ScalarTy = MemoryInst->getType()->getScalarType(); + } else { + assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() == + Intrinsic::masked_scatter); + ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType(); + } + NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy)); } else { // Constant, SelectionDAGBuilder knows to check if its a splat. return false; @@ -6272,6 +6469,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); unsigned BitWidth = LoadResultVT.getSizeInBits(); + // If the BitWidth is 0, do not try to optimize the type + if (BitWidth == 0) + return false; + APInt DemandBits(BitWidth, 0); APInt WidestAndBits(BitWidth, 0); @@ -6409,7 +6610,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, uint64_t Sum = TrueWeight + FalseWeight; if (Sum != 0) { auto Probability = BranchProbability::getBranchProbability(Max, Sum); - if (Probability > TLI->getPredictableBranchThreshold()) + if (Probability > TTI->getPredictableBranchThreshold()) return true; } } @@ -6795,7 +6996,8 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { Value *Cond = SI->getCondition(); Type *OldType = Cond->getType(); LLVMContext &Context = Cond->getContext(); - MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + EVT OldVT = TLI->getValueType(*DL, OldType); + MVT RegType = TLI->getRegisterType(Context, OldVT); unsigned RegWidth = RegType.getSizeInBits(); if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) @@ -6809,14 +7011,21 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { // where N is the number of cases in the switch. auto *NewType = Type::getIntNTy(Context, RegWidth); - // Zero-extend the switch condition and case constants unless the switch - // condition is a function argument that is already being sign-extended. - // In that case, we can avoid an unnecessary mask/extension by sign-extending - // everything instead. + // Extend the switch condition and case constants using the target preferred + // extend unless the switch condition is a function argument with an extend + // attribute. In that case, we can avoid an unnecessary mask/extension by + // matching the argument extension instead. Instruction::CastOps ExtType = Instruction::ZExt; - if (auto *Arg = dyn_cast<Argument>(Cond)) + // Some targets prefer SExt over ZExt. + if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) + ExtType = Instruction::SExt; + + if (auto *Arg = dyn_cast<Argument>(Cond)) { if (Arg->hasSExtAttr()) ExtType = Instruction::SExt; + if (Arg->hasZExtAttr()) + ExtType = Instruction::ZExt; + } auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); ExtInst->insertBefore(SI); @@ -6927,11 +7136,10 @@ class VectorPromoteHelper { StoreInst *ST = cast<StoreInst>(CombineInst); unsigned AS = ST->getPointerAddressSpace(); - unsigned Align = ST->getAlignment(); // Check if this store is supported. if (!TLI.allowsMisalignedMemoryAccesses( TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, - Align)) { + ST->getAlign())) { // If this is not supported, there is no way we can combine // the extract with the store. return false; @@ -6940,9 +7148,9 @@ class VectorPromoteHelper { // The scalar chain of computation has to pay for the transition // scalar to vector. // The vector chain has to account for the combining cost. - uint64_t ScalarCost = + InstructionCost ScalarCost = TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); - uint64_t VectorCost = StoreExtractCombineCost; + InstructionCost VectorCost = StoreExtractCombineCost; enum TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; for (const auto &Inst : InstsToBePromoted) { @@ -7483,9 +7691,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, for (GetElementPtrInst *UGEPI : UGEPIs) { ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); - unsigned ImmCost = - TTI->getIntImmCost(NewIdx, GEPIIdx->getType(), - TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost ImmCost = TTI->getIntImmCost( + NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency); if (ImmCost > TargetTransformInfo::TCC_Basic) return false; } @@ -7511,6 +7718,67 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return true; } +static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { + // Try and convert + // %c = icmp ult %x, 8 + // br %c, bla, blb + // %tc = lshr %x, 3 + // to + // %tc = lshr %x, 3 + // %c = icmp eq %tc, 0 + // br %c, bla, blb + // Creating the cmp to zero can be better for the backend, especially if the + // lshr produces flags that can be used automatically. + if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) + return false; + + ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition()); + if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse()) + return false; + + Value *X = Cmp->getOperand(0); + APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue(); + + for (auto *U : X->users()) { + Instruction *UI = dyn_cast<Instruction>(U); + // A quick dominance check + if (!UI || + (UI->getParent() != Branch->getParent() && + UI->getParent() != Branch->getSuccessor(0) && + UI->getParent() != Branch->getSuccessor(1)) || + (UI->getParent() != Branch->getParent() && + !UI->getParent()->getSinglePredecessor())) + continue; + + if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && + match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, + ConstantInt::get(UI->getType(), 0)); + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + if (Cmp->isEquality() && + (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || + match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, + ConstantInt::get(UI->getType(), 0)); + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + } + return false; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -7672,6 +7940,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { return optimizeSwitchInst(cast<SwitchInst>(I)); case Instruction::ExtractElement: return optimizeExtractElementInst(cast<ExtractElementInst>(I)); + case Instruction::Br: + return optimizeBranch(cast<BranchInst>(I), *TLI); } return false; @@ -7731,19 +8001,23 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) { DbgValueInst &DVI = *cast<DbgValueInst>(I); // Does this dbg.value refer to a sunk address calculation? - Value *Location = DVI.getVariableLocation(); - WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; - Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; - if (SunkAddr) { - // Point dbg.value at locally computed address, which should give the best - // opportunity to be accurately lowered. This update may change the type of - // pointer being referred to; however this makes no difference to debugging - // information, and we can't generate bitcasts that may affect codegen. - DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(), - ValueAsMetadata::get(SunkAddr))); - return true; - } - return false; + bool AnyChange = false; + SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(), + DVI.location_ops().end()); + for (Value *Location : LocationOps) { + WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; + Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + if (SunkAddr) { + // Point dbg.value at locally computed address, which should give the best + // opportunity to be accurately lowered. This update may change the type + // of pointer being referred to; however this makes no difference to + // debugging information, and we can't generate bitcasts that may affect + // codegen. + DVI.replaceVariableLocationOp(Location, SunkAddr); + AnyChange = true; + } + } + return AnyChange; } // A llvm.dbg.value may be using a value before its definition, due to @@ -7762,30 +8036,73 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { if (!DVI) continue; - Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + SmallVector<Instruction *, 4> VIs; + for (Value *V : DVI->getValues()) + if (Instruction *VI = dyn_cast_or_null<Instruction>(V)) + VIs.push_back(VI); + + // This DVI may depend on multiple instructions, complicating any + // potential sink. This block takes the defensive approach, opting to + // "undef" the DVI if it has more than one instruction and any of them do + // not dominate DVI. + for (Instruction *VI : VIs) { + if (VI->isTerminator()) + continue; - if (!VI || VI->isTerminator()) - continue; + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; - // If VI is a phi in a block with an EHPad terminator, we can't insert - // after it. - if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) - continue; + // If the defining instruction dominates the dbg.value, we do not need + // to move the dbg.value. + if (DT.dominates(VI, DVI)) + continue; - // If the defining instruction dominates the dbg.value, we do not need - // to move the dbg.value. - if (DT.dominates(VI, DVI)) - continue; + // If we depend on multiple instructions and any of them doesn't + // dominate this DVI, we probably can't salvage it: moving it to + // after any of the instructions could cause us to lose the others. + if (VIs.size() > 1) { + LLVM_DEBUG( + dbgs() + << "Unable to find valid location for Debug Value, undefing:\n" + << *DVI); + DVI->setUndef(); + break; + } - LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" - << *DVI << ' ' << *VI); - DVI->removeFromParent(); - if (isa<PHINode>(VI)) - DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); - else - DVI->insertAfter(VI); - MadeChange = true; - ++NumDbgValueMoved; + LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" + << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} + +// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered +// probes can be chained dependencies of other regular DAG nodes and block DAG +// combine optimizations. +bool CodeGenPrepare::placePseudoProbes(Function &F) { + bool MadeChange = false; + for (auto &Block : F) { + // Move the rest probes to the beginning of the block. + auto FirstInst = Block.getFirstInsertionPt(); + while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst()) + ++FirstInst; + BasicBlock::iterator I(FirstInst); + I++; + while (I != Block.end()) { + if (auto *II = dyn_cast<PseudoProbeInst>(I++)) { + II->moveBefore(&*FirstInst); + MadeChange = true; + } } } return MadeChange; diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 97c110afdda4..f3cba6225107 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" using namespace llvm; @@ -52,7 +53,7 @@ CGOPT(ThreadModel::Model, ThreadModel) CGOPT_EXP(CodeModel::Model, CodeModel) CGOPT(ExceptionHandling, ExceptionModel) CGOPT_EXP(CodeGenFileType, FileType) -CGOPT(FramePointer::FP, FramePointerUsage) +CGOPT(FramePointerKind, FramePointerUsage) CGOPT(bool, EnableUnsafeFPMath) CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) @@ -68,7 +69,6 @@ CGOPT(bool, DontPlaceZerosInBSS) CGOPT(bool, EnableGuaranteedTailCallOpt) CGOPT(bool, DisableTailCalls) CGOPT(bool, StackSymbolOrdering) -CGOPT(unsigned, OverrideStackAlignment) CGOPT(bool, StackRealign) CGOPT(std::string, TrapFuncName) CGOPT(bool, UseCtors) @@ -78,9 +78,6 @@ CGOPT_EXP(bool, FunctionSections) CGOPT(bool, IgnoreXCOFFVisibility) CGOPT(bool, XCOFFTracebackTable) CGOPT(std::string, BBSections) -CGOPT(std::string, StackProtectorGuard) -CGOPT(unsigned, StackProtectorGuardOffset) -CGOPT(std::string, StackProtectorGuardReg) CGOPT(unsigned, TLSSize) CGOPT(bool, EmulatedTLS) CGOPT(bool, UniqueSectionNames) @@ -96,6 +93,7 @@ CGOPT(bool, PseudoProbeForProfiling) CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) +CGOPT(bool, DebugStrictDwarf) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -182,16 +180,16 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { "Emit nothing, for performance testing"))); CGBINDOPT(FileType); - static cl::opt<FramePointer::FP> FramePointerUsage( + static cl::opt<FramePointerKind> FramePointerUsage( "frame-pointer", cl::desc("Specify frame pointer elimination optimization"), - cl::init(FramePointer::None), + cl::init(FramePointerKind::None), cl::values( - clEnumValN(FramePointer::All, "all", + clEnumValN(FramePointerKind::All, "all", "Disable frame pointer elimination"), - clEnumValN(FramePointer::NonLeaf, "non-leaf", + clEnumValN(FramePointerKind::NonLeaf, "non-leaf", "Disable frame pointer elimination for non-leaf frame"), - clEnumValN(FramePointer::None, "none", + clEnumValN(FramePointerKind::None, "none", "Enable frame pointer elimination"))); CGBINDOPT(FramePointerUsage); @@ -306,11 +304,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(true)); CGBINDOPT(StackSymbolOrdering); - static cl::opt<unsigned> OverrideStackAlignment( - "stack-alignment", cl::desc("Override default stack alignment"), - cl::init(0)); - CGBINDOPT(OverrideStackAlignment); - static cl::opt<bool> StackRealign( "stackrealign", cl::desc("Force align the stack to the minimum alignment"), @@ -364,21 +357,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init("none")); CGBINDOPT(BBSections); - static cl::opt<std::string> StackProtectorGuard( - "stack-protector-guard", cl::desc("Stack protector guard mode"), - cl::init("none")); - CGBINDOPT(StackProtectorGuard); - - static cl::opt<std::string> StackProtectorGuardReg( - "stack-protector-guard-reg", cl::desc("Stack protector guard register"), - cl::init("none")); - CGBINDOPT(StackProtectorGuardReg); - - static cl::opt<unsigned> StackProtectorGuardOffset( - "stack-protector-guard-offset", cl::desc("Stack protector guard offset"), - cl::init((unsigned)-1)); - CGBINDOPT(StackProtectorGuardOffset); - static cl::opt<unsigned> TLSSize( "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0)); CGBINDOPT(TLSSize); @@ -414,6 +392,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::values( clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), + clEnumValN(DebuggerKind::DBX, "dbx", "dbx"), clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); CGBINDOPT(DebuggerTuningOpt); @@ -469,6 +448,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(XRayOmitFunctionIndex); + static cl::opt<bool> DebugStrictDwarf( + "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false)); + CGBINDOPT(DebugStrictDwarf); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -495,24 +478,6 @@ codegen::getBBSectionsMode(llvm::TargetOptions &Options) { } } -llvm::StackProtectorGuards -codegen::getStackProtectorGuardMode(llvm::TargetOptions &Options) { - if (getStackProtectorGuard() == "tls") - return StackProtectorGuards::TLS; - if (getStackProtectorGuard() == "global") - return StackProtectorGuards::Global; - if (getStackProtectorGuard() != "none") { - ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = - MemoryBuffer::getFile(getStackProtectorGuard()); - if (!MBOrErr) - errs() << "error illegal stack protector guard mode: " - << MBOrErr.getError().message() << "\n"; - else - Options.BBSectionsFuncListBuf = std::move(*MBOrErr); - } - return StackProtectorGuards::None; -} - // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. TargetOptions @@ -537,7 +502,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI(); Options.NoZerosInBSS = getDontPlaceZerosInBSS(); Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); - Options.StackAlignmentOverride = getOverrideStackAlignment(); Options.StackSymbolOrdering = getStackSymbolOrdering(); Options.UseInitArray = !getUseCtors(); Options.RelaxELFRelocations = getRelaxELFRelocations(); @@ -549,9 +513,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.BBSections = getBBSectionsMode(Options); Options.UniqueSectionNames = getUniqueSectionNames(); Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames(); - Options.StackProtectorGuard = getStackProtectorGuardMode(Options); - Options.StackProtectorGuardOffset = getStackProtectorGuardOffset(); - Options.StackProtectorGuardReg = getStackProtectorGuardReg(); Options.TLSSize = getTLSSize(); Options.EmulatedTLS = getEmulatedTLS(); Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; @@ -565,6 +526,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); + Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); @@ -660,11 +622,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, } if (FramePointerUsageView->getNumOccurrences() > 0 && !F.hasFnAttribute("frame-pointer")) { - if (getFramePointerUsage() == FramePointer::All) + if (getFramePointerUsage() == FramePointerKind::All) NewAttrs.addAttribute("frame-pointer", "all"); - else if (getFramePointerUsage() == FramePointer::NonLeaf) + else if (getFramePointerUsage() == FramePointerKind::NonLeaf) NewAttrs.addAttribute("frame-pointer", "non-leaf"); - else if (getFramePointerUsage() == FramePointer::None) + else if (getFramePointerUsage() == FramePointerKind::None) NewAttrs.addAttribute("frame-pointer", "none"); } if (DisableTailCallsView->getNumOccurrences() > 0) diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 7ae42b010261..c56c8c87734f 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -65,9 +65,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { bool IsReturnBlock = BB->isReturnBlock(); // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - for (const auto &LI : (*SI)->liveins()) { + for (const MachineBasicBlock *Succ : BB->successors()) + for (const auto &LI : Succ->liveins()) { for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); @@ -143,17 +142,16 @@ static const SDep *CriticalPathStep(const SUnit *SU) { const SDep *Next = nullptr; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. - for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - const SUnit *PredSU = P->getSUnit(); - unsigned PredLatency = P->getLatency(); + for (const SDep &P : SU->Preds) { + const SUnit *PredSU = P.getSUnit(); + unsigned PredLatency = P.getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over // other types of edges. if (NextDepth < PredTotalLatency || - (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + (NextDepth == PredTotalLatency && P.getKind() == SDep::Anti)) { NextDepth = PredTotalLatency; - Next = &*P; + Next = &P; } } return Next; @@ -426,9 +424,8 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, continue; // If NewReg overlaps any of the forbidden registers, we can't use it. bool Forbidden = false; - for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(), - ite = Forbid.end(); it != ite; ++it) - if (TRI->regsOverlap(NewReg, *it)) { + for (unsigned R : Forbid) + if (TRI->regsOverlap(NewReg, R)) { Forbidden = true; break; } @@ -582,11 +579,11 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), - PE = CriticalPathSU->Preds.end(); P != PE; ++P) - if (P->getSUnit() == NextSU ? - (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : - (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + for (const SDep &P : CriticalPathSU->Preds) + if (P.getSUnit() == NextSU + ? (P.getKind() != SDep::Anti || P.getReg() != AntiDepReg) + : (P.getKind() == SDep::Data && + P.getReg() == AntiDepReg)) { AntiDepReg = 0; break; } diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index afcf014bca40..d38bacdb1aa7 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -295,7 +295,7 @@ bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, MemoryLocation(Op2.getValue(), Overlapb, UseTBAA ? Op2.getAAInfo() : AAMDNodes())); - return AAResult != NoAlias; + return AAResult != AliasResult::NoAlias; } bool VLIWPacketizerList::alias(const MachineInstr &MI1, diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 93467e9d09b8..6e7db95b5c2a 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -132,10 +132,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { // Add live-ins from successors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. - for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), - E = MBB->succ_end(); - S != E; S++) - for (const auto &LI : (*S)->liveins()) + for (const MachineBasicBlock *Succ : MBB->successors()) + for (const auto &LI : Succ->liveins()) LivePhysRegs.set(LI.PhysReg); // Now scan the instructions and delete dead ones, tracking physreg diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index 03fe5f155291..1337e57f360b 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -516,15 +516,17 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { transferDefinedLanesStep(MO, Info.DefinedLanes); } - LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; - RegIdx < NumVirtRegs; - ++RegIdx) { - unsigned Reg = Register::index2VirtReg(RegIdx); - const VRegInfo &Info = VRegInfos[RegIdx]; - dbgs() << printReg(Reg, nullptr) - << " Used: " << PrintLaneMask(Info.UsedLanes) - << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; - } dbgs() << "\n";); + LLVM_DEBUG({ + dbgs() << "Defined/Used lanes:\n"; + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = Register::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << printReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } + dbgs() << "\n"; + }); bool Again = false; // Mark operands as dead/unused. diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 97e0162f35a1..5ca1e91cc5f4 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -42,6 +42,12 @@ using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" STATISTIC(NumResumesLowered, "Number of resume calls lowered"); +STATISTIC(NumCleanupLandingPadsUnreachable, + "Number of cleanup landing pads found unreachable"); +STATISTIC(NumCleanupLandingPadsRemaining, + "Number of cleanup landing pads remaining"); +STATISTIC(NumNoUnwind, "Number of functions with nounwind"); +STATISTIC(NumUnwind, "Number of functions with unwind"); namespace { @@ -153,7 +159,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( BasicBlock *BB = RI->getParent(); new UnreachableInst(Ctx, RI); RI->eraseFromParent(); - simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? DTU : nullptr); + simplifyCFG(BB, *TTI, DTU); } } Resumes.resize(ResumesLeft); @@ -163,6 +169,10 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( bool DwarfEHPrepare::InsertUnwindResumeCalls() { SmallVector<ResumeInst *, 16> Resumes; SmallVector<LandingPadInst *, 16> CleanupLPads; + if (F.doesNotThrow()) + NumNoUnwind++; + else + NumUnwind++; for (BasicBlock &BB : F) { if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); @@ -171,6 +181,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { CleanupLPads.push_back(LP); } + NumCleanupLandingPadsRemaining += CleanupLPads.size(); + if (Resumes.empty()) return false; @@ -182,8 +194,19 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { LLVMContext &Ctx = F.getContext(); size_t ResumesLeft = Resumes.size(); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads); +#if LLVM_ENABLE_STATS + unsigned NumRemainingLPs = 0; + for (BasicBlock &BB : F) { + if (auto *LP = BB.getLandingPadInst()) + if (LP->isCleanup()) + NumRemainingLPs++; + } + NumCleanupLandingPadsUnreachable += CleanupLPads.size() - NumRemainingLPs; + NumCleanupLandingPadsRemaining -= CleanupLPads.size() - NumRemainingLPs; +#endif + } if (ResumesLeft == 0) return true; // We pruned them all. @@ -242,25 +265,15 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { CI->setDoesNotReturn(); new UnreachableInst(Ctx, UnwindBB); - if (DTU && RequireAndPreserveDomTree) + if (DTU) DTU->applyUpdates(Updates); return true; } bool DwarfEHPrepare::run() { - assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Original domtree is invalid?"); - bool Changed = InsertUnwindResumeCalls(); - assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Original domtree is invalid?"); - return Changed; } @@ -268,7 +281,7 @@ static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, FunctionCallee &RewindFunction, Function &F, const TargetLowering &TLI, DominatorTree *DT, const TargetTransformInfo *TTI) { - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr, TTI) @@ -295,8 +308,11 @@ public: const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering(); DominatorTree *DT = nullptr; const TargetTransformInfo *TTI = nullptr; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DT = &DTWP->getDomTree(); if (OptLevel != CodeGenOpt::None) { - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + if (!DT) + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); } return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); @@ -308,9 +324,8 @@ public: if (OptLevel != CodeGenOpt::None) { AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); - if (RequireAndPreserveDomTree) - AU.addPreserved<DominatorTreeWrapperPass>(); } + AU.addPreserved<DominatorTreeWrapperPass>(); } StringRef getPassName() const override { diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp new file mode 100644 index 000000000000..c18532946bf9 --- /dev/null +++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp @@ -0,0 +1,84 @@ +//===-- EHContGuardCatchret.cpp - Catchret target symbols -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a machine function pass to insert a symbol before each +/// valid catchret target and store this in the MachineFunction's +/// CatchRetTargets vector. This will be used to emit the table of valid targets +/// used by EHCont Guard. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "ehcontguard-catchret" + +STATISTIC(EHContGuardCatchretTargets, + "Number of EHCont Guard catchret targets"); + +namespace { + +/// MachineFunction pass to insert a symbol before each valid catchret target +/// and store these in the MachineFunction's CatchRetTargets vector. +class EHContGuardCatchret : public MachineFunctionPass { +public: + static char ID; + + EHContGuardCatchret() : MachineFunctionPass(ID) { + initializeEHContGuardCatchretPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "EH Cont Guard catchret targets"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +char EHContGuardCatchret::ID = 0; + +INITIALIZE_PASS(EHContGuardCatchret, "EHContGuardCatchret", + "Insert symbols at valid catchret targets for /guard:ehcont", + false, false) +FunctionPass *llvm::createEHContGuardCatchretPass() { + return new EHContGuardCatchret(); +} + +bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) { + + // Skip modules for which the ehcontguard flag is not set. + if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard")) + return false; + + // Skip functions that do not have catchret + if (!MF.hasEHCatchret()) + return false; + + bool Result = false; + + for (MachineBasicBlock &MBB : MF) { + if (MBB.isEHCatchretTarget()) { + MF.addCatchretTarget(MBB.getEHCatchretSymbol()); + EHContGuardCatchretTargets++; + Result = true; + } + } + + return Result; +} diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index cf7d93d6a33a..90883212a275 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -410,9 +410,8 @@ bool SSAIfConv::findInsertionPoint() { if (!LiveRegUnits.empty()) { LLVM_DEBUG({ dbgs() << "Would clobber"; - for (SparseSet<unsigned>::const_iterator - i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) - dbgs() << ' ' << printRegUnit(*i, TRI); + for (unsigned LRU : LiveRegUnits) + dbgs() << ' ' << printRegUnit(LRU, TRI); dbgs() << " live before " << *I; }); continue; @@ -558,6 +557,52 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { return true; } +/// \return true iff the two registers are known to have the same value. +static bool hasSameValue(const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, Register TReg, + Register FReg) { + if (TReg == FReg) + return true; + + if (!TReg.isVirtual() || !FReg.isVirtual()) + return false; + + const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); + const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); + if (!TDef || !FDef) + return false; + + // If there are side-effects, all bets are off. + if (TDef->hasUnmodeledSideEffects()) + return false; + + // If the instruction could modify memory, or there may be some intervening + // store between the two, we can't consider them to be equal. + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + return false; + + // We also can't guarantee that they are the same if, for example, the + // instructions are both a copy from a physical reg, because some other + // instruction may have modified the value in that reg between the two + // defining insts. + if (any_of(TDef->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) + return false; + + // Check whether the two defining instructions produce the same value(s). + if (!TII->produceSameValue(*TDef, *FDef, &MRI)) + return false; + + // Further, check that the two defs come from corresponding operands. + int TIdx = TDef->findRegisterDefOperandIdx(TReg); + int FIdx = FDef->findRegisterDefOperandIdx(FReg); + if (TIdx == -1 || FIdx == -1) + return false; + + return TIdx == FIdx; +} + /// replacePHIInstrs - Completely replace PHI instructions with selects. /// This is possible when the only Tail predecessors are the if-converted /// blocks. @@ -572,7 +617,15 @@ void SSAIfConv::replacePHIInstrs() { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + } LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; @@ -593,7 +646,7 @@ void SSAIfConv::rewritePHIOperands() { unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (PI.TReg == PI.FReg) { + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal. DstReg = PI.TReg; diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index 0b2ffda50a39..3dd354e8ab7e 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -46,9 +46,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { for (const auto &MBB : *MF) { unsigned OutE = 2 * MBB.getNumber() + 1; // Join the outgoing bundle with the ingoing bundles of all successors. - for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) - EC.join(OutE, 2 * (*SI)->getNumber()); + for (const MachineBasicBlock *Succ : MBB.successors()) + EC.join(OutE, 2 * Succ->getNumber()); } EC.compress(); if (ViewEdgeBundles) @@ -69,9 +68,9 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { return false; } -/// Specialize WriteGraph, the standard implementation won't work. namespace llvm { +/// Specialize WriteGraph, the standard implementation won't work. template<> raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, bool ShortNames, @@ -86,10 +85,9 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, << "\"\n" << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true) << '\n'; - for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) + for (const MachineBasicBlock *Succ : MBB.successors()) O << "\t\"" << printMBBReference(MBB) << "\" -> \"" - << printMBBReference(**SI) << "\" [ color=lightgray ]\n"; + << printMBBReference(*Succ) << "\" [ color=lightgray ]\n"; } O << "}\n"; return O; diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp index 2cca05ea6f55..9621ad4b1248 100644 --- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp +++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp @@ -380,7 +380,7 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Finally set all defs and non-collapsed uses to dv. We must iterate through // all the operators, including imp-def ones. - for (MachineOperand &mo : mi->operands()) { + for (const MachineOperand &mo : mi->operands()) { if (!mo.isReg()) continue; for (int rx : regIndices(mo.getReg())) { @@ -454,16 +454,14 @@ bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { // Traverse the basic blocks. LoopTraversal Traversal; LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); - for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder) processBasicBlock(TraversedMBB); - } - for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) { - for (DomainValue *OutLiveReg : OutLiveRegs) { + for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos) + for (DomainValue *OutLiveReg : OutLiveRegs) if (OutLiveReg) release(OutLiveReg); - } - } + MBBOutRegsInfos.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 9f85db9de884..50fdc2114780 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -21,11 +22,13 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -75,6 +78,7 @@ class MemCmpExpansion { PHINode *PhiRes; const bool IsUsedForZeroCmp; const DataLayout &DL; + DomTreeUpdater *DTU; IRBuilder<> Builder; // Represents the decomposition in blocks of the expansion. For example, // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and @@ -123,7 +127,8 @@ class MemCmpExpansion { public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout); + const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, + DomTreeUpdater *DTU); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -212,10 +217,12 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout) + const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, + DomTreeUpdater *DTU) : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), - IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { + IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU), + Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes); @@ -325,13 +332,14 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, // final phi node for selecting the memcmp result. void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes) { - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + BasicBlock *BB = LoadCmpBlocks[BlockIndex]; + Builder.SetInsertPoint(BB); const LoadPair Loads = getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, Type::getInt32Ty(CI->getContext()), OffsetBytes); Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); - PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); + PhiRes->addIncoming(Diff, BB); if (BlockIndex < (LoadCmpBlocks.size() - 1)) { // Early exit branch if difference found to EndBlock. Otherwise, continue to @@ -340,10 +348,16 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, ConstantInt::get(Diff->getType(), 0)); BranchInst *CmpBr = BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BB, EndBlock}, + {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}}); Builder.Insert(CmpBr); } else { // The last block has an unconditional branch to EndBlock. BranchInst *CmpBr = BranchInst::Create(EndBlock); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}}); Builder.Insert(CmpBr); } } @@ -428,8 +442,12 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, : LoadCmpBlocks[BlockIndex + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, // continue to next LoadCmpBlock or EndBlock. + BasicBlock *BB = Builder.GetInsertBlock(); BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); Builder.Insert(CmpBr); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB}, + {DominatorTree::Insert, BB, NextBB}}); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in @@ -482,8 +500,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { : LoadCmpBlocks[BlockIndex + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, continue // to next LoadCmpBlock or EndBlock. + BasicBlock *BB = Builder.GetInsertBlock(); BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); Builder.Insert(CmpBr); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB}, + {DominatorTree::Insert, BB, ResBlock.BB}}); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in @@ -507,6 +529,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() { PhiRes->addIncoming(Res, ResBlock.BB); BranchInst *NewBr = BranchInst::Create(EndBlock); Builder.Insert(NewBr); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); return; } BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); @@ -519,9 +543,11 @@ void MemCmpExpansion::emitMemCmpResultBlock() { Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), ConstantInt::get(Builder.getInt32Ty(), 1)); + PhiRes->addIncoming(Res, ResBlock.BB); BranchInst *NewBr = BranchInst::Create(EndBlock); Builder.Insert(NewBr); - PhiRes->addIncoming(Res, ResBlock.BB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); } void MemCmpExpansion::setupResultBlockPHINodes() { @@ -597,7 +623,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { // Create the basic block framework for a multi-block expansion. if (getNumBlocks() != 1) { BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr, + /*MSSAU=*/nullptr, "endblock"); setupEndBlockPHINodes(); createResultBlock(); @@ -610,9 +637,12 @@ Value *MemCmpExpansion::getMemCmpExpansion() { // Create the number of required load compare basic blocks. createLoadCmpBlocks(); - // Update the terminator added by splitBasicBlock to branch to the first + // Update the terminator added by SplitBlock to branch to the first // LoadCmpBlock. StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]}, + {DominatorTree::Delete, StartBlock, EndBlock}}); } Builder.SetCurrentDebugLocation(CI->getDebugLoc()); @@ -707,7 +737,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// ret i32 %phi.res static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, + DomTreeUpdater *DTU) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -744,7 +775,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmp; - MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL); + MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { @@ -763,8 +794,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, return true; } - - class ExpandMemCmpPass : public FunctionPass { public: static char ID; @@ -791,7 +820,10 @@ public: auto *BFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : nullptr; - auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI); + DominatorTree *DT = nullptr; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DT = &DTWP->getDomTree(); + auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT); return !PA.areAllPreserved(); } @@ -800,25 +832,28 @@ private: AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); FunctionPass::getAnalysisUsage(AU); } PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); + const TargetLowering *TL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, DominatorTree *DT); // Returns true if a change was made. bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); + const TargetTransformInfo *TTI, const TargetLowering *TL, + const DataLayout &DL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, DomTreeUpdater *DTU); }; -bool ExpandMemCmpPass::runOnBlock( - BasicBlock &BB, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, const TargetLowering* TL, - const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { +bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + const TargetLowering *TL, + const DataLayout &DL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, + DomTreeUpdater *DTU) { for (Instruction& I : BB) { CallInst *CI = dyn_cast<CallInst>(&I); if (!CI) { @@ -827,22 +862,27 @@ bool ExpandMemCmpPass::runOnBlock( LibFunc Func; if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) { + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) { return true; } } return false; } +PreservedAnalyses +ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + const TargetLowering *TL, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, DominatorTree *DT) { + Optional<DomTreeUpdater> DTU; + if (DT) + DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); -PreservedAnalyses ExpandMemCmpPass::runImpl( - Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const TargetLowering* TL, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { const DataLayout& DL = F.getParent()->getDataLayout(); bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { - if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, + DTU.hasValue() ? DTU.getPointer() : nullptr)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. @@ -854,7 +894,11 @@ PreservedAnalyses ExpandMemCmpPass::runImpl( if (MadeChanges) for (BasicBlock &BB : F) SimplifyInstructionsInBlock(&BB); - return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); + if (!MadeChanges) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; } } // namespace @@ -866,6 +910,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 842211c09134..d909d6aa5b0a 100644 --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -188,9 +188,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); - mbbi != mbbe; ++mbbi) { - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end(); mi != me;) { MachineInstr &MI = *mi; // Advance iterator here because MI may be erased. diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index a4c9f02dc64d..2bcaf750911b 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -154,13 +154,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } case Intrinsic::vector_reduce_fmax: case Intrinsic::vector_reduce_fmin: { - // FIXME: We only expand 'fast' reductions here because the underlying - // code in createMinMaxOp() assumes that comparisons use 'fast' - // semantics. + // We require "nnan" to use a shuffle reduction; "nsz" is implied by the + // semantics of the reduction. Value *Vec = II->getArgOperand(0); if (!isPowerOf2_32( cast<FixedVectorType>(Vec->getType())->getNumElements()) || - !FMF.isFast()) + !FMF.noNaNs()) continue; Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp new file mode 100644 index 000000000000..a8d4d4ebe8bd --- /dev/null +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -0,0 +1,469 @@ +//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR expansion for vector predication intrinsics, allowing +// targets to enable vector predication until just before codegen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExpandVectorPredication.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +using VPLegalization = TargetTransformInfo::VPLegalization; +using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; + +// Keep this in sync with TargetTransformInfo::VPLegalization. +#define VPINTERNAL_VPLEGAL_CASES \ + VPINTERNAL_CASE(Legal) \ + VPINTERNAL_CASE(Discard) \ + VPINTERNAL_CASE(Convert) + +#define VPINTERNAL_CASE(X) "|" #X + +// Override options. +static cl::opt<std::string> EVLTransformOverride( + "expandvp-override-evl-transform", cl::init(""), cl::Hidden, + cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES + ". If non-empty, ignore " + "TargetTransformInfo and " + "always use this transformation for the %evl parameter (Used in " + "testing).")); + +static cl::opt<std::string> MaskTransformOverride( + "expandvp-override-mask-transform", cl::init(""), cl::Hidden, + cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES + ". If non-empty, Ignore " + "TargetTransformInfo and " + "always use this transformation for the %mask parameter (Used in " + "testing).")); + +#undef VPINTERNAL_CASE +#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) + +static VPTransform parseOverrideOption(const std::string &TextOpt) { + return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; +} + +#undef VPINTERNAL_VPLEGAL_CASES + +// Whether any override options are set. +static bool anyExpandVPOverridesSet() { + return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); +} + +#define DEBUG_TYPE "expandvp" + +STATISTIC(NumFoldedVL, "Number of folded vector length params"); +STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); + +///// Helpers { + +/// \returns Whether the vector mask \p MaskVal has all lane bits set. +static bool isAllTrueMask(Value *MaskVal) { + auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); + return ConstVec && ConstVec->isAllOnesValue(); +} + +/// \returns A non-excepting divisor constant for this type. +static Constant *getSafeDivisor(Type *DivTy) { + assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); + return ConstantInt::get(DivTy, 1u, false); +} + +/// Transfer operation properties from \p OldVPI to \p NewVal. +static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { + auto *NewInst = dyn_cast<Instruction>(&NewVal); + if (!NewInst || !isa<FPMathOperator>(NewVal)) + return; + + auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); + if (!OldFMOp) + return; + + NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); +} + +/// Transfer all properties from \p OldOp to \p NewOp and replace all uses. +/// OldVP gets erased. +static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { + transferDecorations(NewOp, OldOp); + OldOp.replaceAllUsesWith(&NewOp); + OldOp.eraseFromParent(); +} + +//// } Helpers + +namespace { + +// Expansion pass state at function scope. +struct CachingVPExpander { + Function &F; + const TargetTransformInfo &TTI; + + /// \returns A (fixed length) vector with ascending integer indices + /// (<0, 1, ..., NumElems-1>). + /// \p Builder + /// Used for instruction creation. + /// \p LaneTy + /// Integer element type of the result vector. + /// \p NumElems + /// Number of vector elements. + Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, + unsigned NumElems); + + /// \returns A bitmask that is true where the lane position is less-than \p + /// EVLParam + /// + /// \p Builder + /// Used for instruction creation. + /// \p VLParam + /// The explicit vector length parameter to test against the lane + /// positions. + /// \p ElemCount + /// Static (potentially scalable) number of vector elements. + Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, + ElementCount ElemCount); + + Value *foldEVLIntoMask(VPIntrinsic &VPI); + + /// "Remove" the %evl parameter of \p PI by setting it to the static vector + /// length of the operation. + void discardEVLParameter(VPIntrinsic &PI); + + /// \brief Lower this VP binary operator to a unpredicated binary operator. + Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, + VPIntrinsic &PI); + + /// \brief Query TTI and expand the vector predication in \p P accordingly. + Value *expandPredication(VPIntrinsic &PI); + + /// \brief Determine how and whether the VPIntrinsic \p VPI shall be + /// expanded. This overrides TTI with the cl::opts listed at the top of this + /// file. + VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; + bool UsingTTIOverrides; + +public: + CachingVPExpander(Function &F, const TargetTransformInfo &TTI) + : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} + + bool expandVectorPredication(); +}; + +//// CachingVPExpander { + +Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, + unsigned NumElems) { + // TODO add caching + SmallVector<Constant *, 16> ConstElems; + + for (unsigned Idx = 0; Idx < NumElems; ++Idx) + ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); + + return ConstantVector::get(ConstElems); +} + +Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, + Value *EVLParam, + ElementCount ElemCount) { + // TODO add caching + // Scalable vector %evl conversion. + if (ElemCount.isScalable()) { + auto *M = Builder.GetInsertBlock()->getModule(); + Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); + Function *ActiveMaskFunc = Intrinsic::getDeclaration( + M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); + // `get_active_lane_mask` performs an implicit less-than comparison. + Value *ConstZero = Builder.getInt32(0); + return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); + } + + // Fixed vector %evl conversion. + Type *LaneTy = EVLParam->getType(); + unsigned NumElems = ElemCount.getFixedValue(); + Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); + Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); + return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); +} + +Value * +CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + assert((isSafeToSpeculativelyExecute(&VPI) || + VPI.canIgnoreVectorLengthParam()) && + "Implicitly dropping %evl in non-speculatable operator!"); + + auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); + assert(Instruction::isBinaryOp(OC)); + + Value *Op0 = VPI.getOperand(0); + Value *Op1 = VPI.getOperand(1); + Value *Mask = VPI.getMaskParam(); + + // Blend in safe operands. + if (Mask && !isAllTrueMask(Mask)) { + switch (OC) { + default: + // Can safely ignore the predicate. + break; + + // Division operators need a safe divisor on masked-off lanes (1). + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + // 2nd operand must not be zero. + Value *SafeDivisor = getSafeDivisor(VPI.getType()); + Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); + } + } + + Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); + + replaceOperation(*NewBinOp, VPI); + return NewBinOp; +} + +void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { + LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); + + if (VPI.canIgnoreVectorLengthParam()) + return; + + Value *EVLParam = VPI.getVectorLengthParam(); + if (!EVLParam) + return; + + ElementCount StaticElemCount = VPI.getStaticVectorLength(); + Value *MaxEVL = nullptr; + Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); + if (StaticElemCount.isScalable()) { + // TODO add caching + auto *M = VPI.getModule(); + Function *VScaleFunc = + Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); + IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); + Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); + Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); + MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", + /*NUW*/ true, /*NSW*/ false); + } else { + MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); + } + VPI.setVectorLengthParam(MaxEVL); +} + +Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { + LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); + + IRBuilder<> Builder(&VPI); + + // Ineffective %evl parameter and so nothing to do here. + if (VPI.canIgnoreVectorLengthParam()) + return &VPI; + + // Only VP intrinsics can have an %evl parameter. + Value *OldMaskParam = VPI.getMaskParam(); + Value *OldEVLParam = VPI.getVectorLengthParam(); + assert(OldMaskParam && "no mask param to fold the vl param into"); + assert(OldEVLParam && "no EVL param to fold away"); + + LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); + LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); + + // Convert the %evl predication into vector mask predication. + ElementCount ElemCount = VPI.getStaticVectorLength(); + Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); + Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); + VPI.setMaskParam(NewMaskParam); + + // Drop the %evl parameter. + discardEVLParameter(VPI); + assert(VPI.canIgnoreVectorLengthParam() && + "transformation did not render the evl param ineffective!"); + + // Reassess the modified instruction. + return &VPI; +} + +Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { + LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); + + IRBuilder<> Builder(&VPI); + + // Try lowering to a LLVM instruction first. + auto OC = VPI.getFunctionalOpcode(); + + if (OC && Instruction::isBinaryOp(*OC)) + return expandPredicationInBinaryOperator(Builder, VPI); + + return &VPI; +} + +//// } CachingVPExpander + +struct TransformJob { + VPIntrinsic *PI; + TargetTransformInfo::VPLegalization Strategy; + TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat) + : PI(PI), Strategy(InitStrat) {} + + bool isDone() const { return Strategy.shouldDoNothing(); } +}; + +void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { + // Speculatable instructions do not strictly need predication. + if (isSafeToSpeculativelyExecute(&I)) { + // Converting a speculatable VP intrinsic means dropping %mask and %evl. + // No need to expand %evl into the %mask only to ignore that code. + if (LegalizeStrat.OpStrategy == VPLegalization::Convert) + LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; + return; + } + + // We have to preserve the predicating effect of %evl for this + // non-speculatable VP intrinsic. + // 1) Never discard %evl. + // 2) If this VP intrinsic will be expanded to non-VP code, make sure that + // %evl gets folded into %mask. + if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || + (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { + LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; + } +} + +VPLegalization +CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { + auto VPStrat = TTI.getVPLegalizationStrategy(VPI); + if (LLVM_LIKELY(!UsingTTIOverrides)) { + // No overrides - we are in production. + return VPStrat; + } + + // Overrides set - we are in testing, the following does not need to be + // efficient. + VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); + VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); + return VPStrat; +} + +/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI. +bool CachingVPExpander::expandVectorPredication() { + SmallVector<TransformJob, 16> Worklist; + + // Collect all VPIntrinsics that need expansion and determine their expansion + // strategy. + for (auto &I : instructions(F)) { + auto *VPI = dyn_cast<VPIntrinsic>(&I); + if (!VPI) + continue; + auto VPStrat = getVPLegalizationStrategy(*VPI); + sanitizeStrategy(I, VPStrat); + if (!VPStrat.shouldDoNothing()) + Worklist.emplace_back(VPI, VPStrat); + } + if (Worklist.empty()) + return false; + + // Transform all VPIntrinsics on the worklist. + LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size() + << " instructions ::::\n"); + for (TransformJob Job : Worklist) { + // Transform the EVL parameter. + switch (Job.Strategy.EVLParamStrategy) { + case VPLegalization::Legal: + break; + case VPLegalization::Discard: + discardEVLParameter(*Job.PI); + break; + case VPLegalization::Convert: + if (foldEVLIntoMask(*Job.PI)) + ++NumFoldedVL; + break; + } + Job.Strategy.EVLParamStrategy = VPLegalization::Legal; + + // Replace with a non-predicated operation. + switch (Job.Strategy.OpStrategy) { + case VPLegalization::Legal: + break; + case VPLegalization::Discard: + llvm_unreachable("Invalid strategy for operators."); + case VPLegalization::Convert: + expandPredication(*Job.PI); + ++NumLoweredVPOps; + break; + } + Job.Strategy.OpStrategy = VPLegalization::Legal; + + assert(Job.isDone() && "incomplete transformation"); + } + + return true; +} +class ExpandVectorPredication : public FunctionPass { +public: + static char ID; + ExpandVectorPredication() : FunctionPass(ID) { + initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + CachingVPExpander VPExpander(F, *TTI); + return VPExpander.expandVectorPredication(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.setPreservesCFG(); + } +}; +} // namespace + +char ExpandVectorPredication::ID; +INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", + "Expand vector predication intrinsics", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp", + "Expand vector predication intrinsics", false, false) + +FunctionPass *llvm::createExpandVectorPredicationPass() { + return new ExpandVectorPredication(); +} + +PreservedAnalyses +ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) { + const auto &TTI = AM.getResult<TargetIRAnalysis>(F); + CachingVPExpander VPExpander(F, TTI); + if (!VPExpander.expandVectorPredication()) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp index 23560b4cd136..1d35b194f218 100644 --- a/llvm/lib/CodeGen/FaultMaps.cpp +++ b/llvm/lib/CodeGen/FaultMaps.cpp @@ -15,8 +15,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -114,39 +112,3 @@ const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) { return "FaultingStore"; } } - -raw_ostream &llvm:: -operator<<(raw_ostream &OS, - const FaultMapParser::FunctionFaultInfoAccessor &FFI) { - OS << "Fault kind: " - << FaultMaps::faultTypeToString((FaultMaps::FaultKind)FFI.getFaultKind()) - << ", faulting PC offset: " << FFI.getFaultingPCOffset() - << ", handling PC offset: " << FFI.getHandlerPCOffset(); - return OS; -} - -raw_ostream &llvm:: -operator<<(raw_ostream &OS, const FaultMapParser::FunctionInfoAccessor &FI) { - OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8) - << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n"; - for (unsigned i = 0, e = FI.getNumFaultingPCs(); i != e; ++i) - OS << FI.getFunctionFaultInfoAt(i) << "\n"; - return OS; -} - -raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) { - OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n"; - OS << "NumFunctions: " << FMP.getNumFunctions() << "\n"; - - if (FMP.getNumFunctions() == 0) - return OS; - - FaultMapParser::FunctionInfoAccessor FI; - - for (unsigned i = 0, e = FMP.getNumFunctions(); i != e; ++i) { - FI = (i == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo(); - OS << FI; - } - - return OS; -} diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index f8f99b7e87f2..e3c4e86d203b 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -484,6 +484,16 @@ public: MachineOperand &DefMO = MI.getOperand(I); assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand"); Register Reg = DefMO.getReg(); + assert(DefMO.isTied() && "Def is expected to be tied"); + // We skipped undef uses and did not spill them, so we should not + // proceed with defs here. + if (MI.getOperand(MI.findTiedOperandIdx(I)).isUndef()) { + if (AllowGCPtrInCSR) { + NewIndices.push_back(NewMI->getNumOperands()); + MIB.addReg(Reg, RegState::Define); + } + continue; + } if (!AllowGCPtrInCSR) { assert(is_contained(RegsToSpill, Reg)); RegsToReload.push_back(Reg); diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp index 7c96d838d992..8fae798b31d9 100644 --- a/llvm/lib/CodeGen/GCMetadata.cpp +++ b/llvm/lib/CodeGen/GCMetadata.cpp @@ -12,7 +12,7 @@ #include "llvm/CodeGen/GCMetadata.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" @@ -122,14 +122,9 @@ bool Printer::runOnFunction(Function &F) { OS << "\t" << PI->Label->getName() << ": " << "post-call" << ", live = {"; - for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), - RE = FD->live_end(PI); - ;) { - OS << " " << RI->Num; - if (++RI == RE) - break; - OS << ","; - } + ListSeparator LS(","); + for (const GCRoot &R : make_range(FD->live_begin(PI), FD->live_end(PI))) + OS << LS << " " << R.Num; OS << " }\n"; } diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index e2ee0c97f94d..58269e172c57 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -86,6 +85,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } char LowerIntrinsics::ID = 0; +char &llvm::GCLoweringID = LowerIntrinsics::ID; LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) { initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); @@ -105,9 +105,9 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { bool LowerIntrinsics::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration() && I->hasGC()) - MI->getFunctionInfo(*I); // Instantiate the GC strategy. + for (Function &F : M) + if (!F.isDeclaration() && F.hasGC()) + MI->getFunctionInfo(F); // Instantiate the GC strategy. return false; } @@ -317,8 +317,8 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // size, we use UINT64_MAX to represent this. const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - const bool DynamicFrameSize = MFI.hasVarSizedObjects() || - RegInfo->needsStackRealignment(MF); + const bool DynamicFrameSize = + MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF); FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize()); // Find all safe points. diff --git a/llvm/lib/CodeGen/GCStrategy.cpp b/llvm/lib/CodeGen/GCStrategy.cpp deleted file mode 100644 index 43d06b0f82e9..000000000000 --- a/llvm/lib/CodeGen/GCStrategy.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===- GCStrategy.cpp - Garbage Collector Description ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the policy object GCStrategy which describes the -// behavior of a given garbage collector. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCStrategy.h" - -using namespace llvm; - -LLVM_INSTANTIATE_REGISTRY(GCRegistry) - -GCStrategy::GCStrategy() = default; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index 2fa208fbfaaf..f9bfe8518083 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -11,6 +11,7 @@ #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Error.h" #define DEBUG_TYPE "cseinfo" @@ -259,8 +260,17 @@ void GISelCSEInfo::releaseMemory() { #endif } +#ifndef NDEBUG +static const char *stringify(const MachineInstr *MI, std::string &S) { + raw_string_ostream OS(S); + OS << *MI; + return OS.str().c_str(); +} +#endif + Error GISelCSEInfo::verify() { #ifndef NDEBUG + std::string S1, S2; handleRecordedInsts(); // For each instruction in map from MI -> UMI, // Profile(MI) and make sure UMI is found for that profile. @@ -273,20 +283,23 @@ Error GISelCSEInfo::verify() { if (FoundNode != It.second) return createStringError(std::errc::not_supported, "CSEMap mismatch, InstrMapping has MIs without " - "corresponding Nodes in CSEMap"); + "corresponding Nodes in CSEMap:\n%s", + stringify(It.second->MI, S1)); } // For every node in the CSEMap, make sure that the InstrMapping // points to it. - for (auto It = CSEMap.begin(), End = CSEMap.end(); It != End; ++It) { - const UniqueMachineInstr &UMI = *It; + for (const UniqueMachineInstr &UMI : CSEMap) { if (!InstrMapping.count(UMI.MI)) return createStringError(std::errc::not_supported, - "Node in CSE without InstrMapping", UMI.MI); + "Node in CSE without InstrMapping:\n%s", + stringify(UMI.MI, S1)); if (InstrMapping[UMI.MI] != &UMI) return createStringError(std::make_error_code(std::errc::not_supported), - "Mismatch in CSE mapping"); + "Mismatch in CSE mapping:\n%s\n%s", + stringify(InstrMapping[UMI.MI]->MI, S1), + stringify(UMI.MI, S2)); } #endif return Error::success(); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 2c86f06a602d..dd560e8ff145 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -189,7 +189,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, assert(DstOps.size() == 1 && "Invalid dsts"); if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) - return buildConstant(DstOps[0], Cst->getSExtValue()); + return buildConstant(DstOps[0], *Cst); break; } case TargetOpcode::G_SEXT_INREG: { @@ -200,7 +200,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, const SrcOp &Src1 = SrcOps[1]; if (auto MaybeCst = ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) - return buildConstant(Dst, MaybeCst->getSExtValue()); + return buildConstant(Dst, *MaybeCst); + break; + } + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { + // Try to constant fold these. + assert(SrcOps.size() == 1 && "Invalid sources"); + assert(DstOps.size() == 1 && "Invalid dsts"); + if (Optional<APFloat> Cst = ConstantFoldIntToFloat( + Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI())) + return buildFConstant(DstOps[0], *Cst); break; } } diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 803e1527a4f0..d2cda9ece31a 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -54,6 +54,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, Flags.setReturned(); if (AttrFn(Attribute::SwiftSelf)) Flags.setSwiftSelf(); + if (AttrFn(Attribute::SwiftAsync)) + Flags.setSwiftAsync(); if (AttrFn(Attribute::SwiftError)) Flags.setSwiftError(); } @@ -112,7 +114,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); for (auto &Arg : CB.args()) { - ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), + ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); @@ -133,7 +135,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, else Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}}; + Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}}; if (!Info.OrigRet.Ty->isVoidTy()) setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); @@ -154,22 +156,42 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const AttributeList &Attrs = FuncInfo.getAttributes(); addArgFlagsFromAttributes(Flags, Attrs, OpIdx); + PointerType *PtrTy = dyn_cast<PointerType>(Arg.Ty->getScalarType()); + if (PtrTy) { + Flags.setPointer(); + Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); + } + + Align MemAlign = DL.getABITypeAlign(Arg.Ty); if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { - Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); + assert(OpIdx >= AttributeList::FirstArgIndex); + Type *ElementTy = PtrTy->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. - Align FrameAlign; - if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 2)) - FrameAlign = *ParamAlign; + if (auto ParamAlign = + FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) + MemAlign = *ParamAlign; + else if ((ParamAlign = + FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex))) + MemAlign = *ParamAlign; else - FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); - Flags.setByValAlign(FrameAlign); + MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); + } else if (OpIdx >= AttributeList::FirstArgIndex) { + if (auto ParamAlign = + FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) + MemAlign = *ParamAlign; } + Flags.setMemAlign(MemAlign); Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); + + // Don't try to use the returned attribute if the argument is marked as + // swiftself, since it won't be passed in x0. + if (Flags.isSwiftSelf()) + Flags.setReturned(false); } template void @@ -182,96 +204,366 @@ CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const CallBase &FuncInfo) const; -Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy, - MachineIRBuilder &MIRBuilder) const { - assert(SrcRegs.size() > 1 && "Nothing to pack"); +void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl<ArgInfo> &SplitArgs, + const DataLayout &DL, + CallingConv::ID CallConv, + SmallVectorImpl<uint64_t> *Offsets) const { + LLVMContext &Ctx = OrigArg.Ty->getContext(); - const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); - MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + SmallVector<EVT, 4> SplitVTs; + ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0); + + if (SplitVTs.size() == 0) + return; + + if (SplitVTs.size() == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), + OrigArg.OrigArgIndex, OrigArg.Flags[0], + OrigArg.IsFixed, OrigArg.OrigValue); + return; + } - LLT PackedLLT = getLLTForType(*PackedTy, DL); + // Create one ArgInfo for each virtual register in the original ArgInfo. + assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); + + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + OrigArg.Ty, CallConv, false, DL); + for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { + Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex, + OrigArg.Flags[0], OrigArg.IsFixed); + if (NeedsRegBlock) + SplitArgs.back().Flags[0].setInConsecutiveRegs(); + } - SmallVector<LLT, 8> LLTs; - SmallVector<uint64_t, 8> Offsets; - computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); - assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch"); + SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); +} + +/// Pack values \p SrcRegs to cover the vector type result \p DstRegs. +static MachineInstrBuilder +mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, + ArrayRef<Register> SrcRegs) { + MachineRegisterInfo &MRI = *B.getMRI(); + LLT LLTy = MRI.getType(DstRegs[0]); + LLT PartLLT = MRI.getType(SrcRegs[0]); + + // Deal with v3s16 split into v2s16 + LLT LCMTy = getLCMType(LLTy, PartLLT); + if (LCMTy == LLTy) { + // Common case where no padding is needed. + assert(DstRegs.size() == 1); + return B.buildConcatVectors(DstRegs[0], SrcRegs); + } - Register Dst = MRI->createGenericVirtualRegister(PackedLLT); - MIRBuilder.buildUndef(Dst); - for (unsigned i = 0; i < SrcRegs.size(); ++i) { - Register NewDst = MRI->createGenericVirtualRegister(PackedLLT); - MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]); - Dst = NewDst; + // We need to create an unmerge to the result registers, which may require + // widening the original value. + Register UnmergeSrcReg; + if (LCMTy != PartLLT) { + // e.g. A <3 x s16> value was split to <2 x s16> + // %register_value0:_(<2 x s16>) + // %register_value1:_(<2 x s16>) + // %undef:_(<2 x s16>) = G_IMPLICIT_DEF + // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef + // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat + const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); + Register Undef = B.buildUndef(PartLLT).getReg(0); + + // Build vector of undefs. + SmallVector<Register, 8> WidenedSrcs(NumWide, Undef); + + // Replace the first sources with the real registers. + std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); + UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + } else { + // We don't need to widen anything if we're extracting a scalar which was + // promoted to a vector e.g. s8 -> v4s8 -> s8 + assert(SrcRegs.size() == 1); + UnmergeSrcReg = SrcRegs[0]; } - return Dst; + int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); + + SmallVector<Register, 8> PadDstRegs(NumDst); + std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin()); + + // Create the excess dead defs for the unmerge. + for (int I = DstRegs.size(); I != NumDst; ++I) + PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + + return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } -void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, - Type *PackedTy, - MachineIRBuilder &MIRBuilder) const { - assert(DstRegs.size() > 1 && "Nothing to unpack"); +/// Create a sequence of instructions to combine pieces split into register +/// typed values to the original IR value. \p OrigRegs contains the destination +/// value registers of type \p LLTy, and \p Regs contains the legalized pieces +/// with type \p PartLLT. This is used for incoming values (physregs to vregs). +static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, + ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, + const ISD::ArgFlagsTy Flags) { + MachineRegisterInfo &MRI = *B.getMRI(); + + if (PartLLT == LLTy) { + // We should have avoided introducing a new virtual register, and just + // directly assigned here. + assert(OrigRegs[0] == Regs[0]); + return; + } - const DataLayout &DL = MIRBuilder.getDataLayout(); + if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 && + Regs.size() == 1) { + B.buildBitcast(OrigRegs[0], Regs[0]); + return; + } + + // A vector PartLLT needs extending to LLTy's element size. + // E.g. <2 x s64> = G_SEXT <2 x s32>. + if (PartLLT.isVector() == LLTy.isVector() && + PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() && + (!PartLLT.isVector() || + PartLLT.getNumElements() == LLTy.getNumElements()) && + OrigRegs.size() == 1 && Regs.size() == 1) { + Register SrcReg = Regs[0]; + + LLT LocTy = MRI.getType(SrcReg); + + if (Flags.isSExt()) { + SrcReg = B.buildAssertSExt(LocTy, SrcReg, LLTy.getScalarSizeInBits()) + .getReg(0); + } else if (Flags.isZExt()) { + SrcReg = B.buildAssertZExt(LocTy, SrcReg, LLTy.getScalarSizeInBits()) + .getReg(0); + } + + // Sometimes pointers are passed zero extended. + LLT OrigTy = MRI.getType(OrigRegs[0]); + if (OrigTy.isPointer()) { + LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits()); + B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg)); + return; + } + + B.buildTrunc(OrigRegs[0], SrcReg); + return; + } + + if (!LLTy.isVector() && !PartLLT.isVector()) { + assert(OrigRegs.size() == 1); + LLT OrigTy = MRI.getType(OrigRegs[0]); + + unsigned SrcSize = PartLLT.getSizeInBits().getFixedSize() * Regs.size(); + if (SrcSize == OrigTy.getSizeInBits()) + B.buildMerge(OrigRegs[0], Regs); + else { + auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs); + B.buildTrunc(OrigRegs[0], Widened); + } + + return; + } + + if (PartLLT.isVector()) { + assert(OrigRegs.size() == 1); + SmallVector<Register> CastRegs(Regs.begin(), Regs.end()); + + // If PartLLT is a mismatched vector in both number of elements and element + // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to + // have the same elt type, i.e. v4s32. + if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() && + PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && + Regs.size() == 1) { + LLT NewTy = PartLLT.changeElementType(LLTy.getElementType()) + .changeElementCount(PartLLT.getElementCount() * 2); + CastRegs[0] = B.buildBitcast(NewTy, Regs[0]).getReg(0); + PartLLT = NewTy; + } + + if (LLTy.getScalarType() == PartLLT.getElementType()) { + mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs); + } else { + unsigned I = 0; + LLT GCDTy = getGCDType(LLTy, PartLLT); + + // We are both splitting a vector, and bitcasting its element types. Cast + // the source pieces into the appropriate number of pieces with the result + // element type. + for (Register SrcReg : CastRegs) + CastRegs[I++] = B.buildBitcast(GCDTy, SrcReg).getReg(0); + mergeVectorRegsToResultRegs(B, OrigRegs, CastRegs); + } + + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + + // Pointer information was discarded. We'll need to coerce some register types + // to avoid violating type constraints. + LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType(); + + assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); + + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + + if (RealDstEltTy.isPointer()) { + for (Register Reg : Regs) + MRI.setType(Reg, RealDstEltTy); + } + + B.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector<Register, 8> EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt)); + // Fix the type in case this is really a vector of pointers. + MRI.setType(Merge.getReg(0), RealDstEltTy); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } - SmallVector<LLT, 8> LLTs; - SmallVector<uint64_t, 8> Offsets; - computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); - assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch"); + B.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + // FIXME: Should handle floating point promotions. + LLT BVType = LLT::fixed_vector(LLTy.getNumElements(), PartLLT); + auto BV = B.buildBuildVector(BVType, Regs); + B.buildTrunc(OrigRegs[0], BV); + } +} + +/// Create a sequence of instructions to expand the value in \p SrcReg (of type +/// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should +/// contain the type of scalar value extension if necessary. +/// +/// This is used for outgoing values (vregs to physregs) +static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, + Register SrcReg, LLT SrcTy, LLT PartTy, + unsigned ExtendOp = TargetOpcode::G_ANYEXT) { + // We could just insert a regular copy, but this is unreachable at the moment. + assert(SrcTy != PartTy && "identical part types shouldn't reach here"); + + const unsigned PartSize = PartTy.getSizeInBits(); + + if (PartTy.isVector() == SrcTy.isVector() && + PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { + assert(DstRegs.size() == 1); + B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg}); + return; + } + + if (SrcTy.isVector() && !PartTy.isVector() && + PartSize > SrcTy.getElementType().getSizeInBits()) { + // Vector was scalarized, and the elements extended. + auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); + for (int i = 0, e = DstRegs.size(); i != e; ++i) + B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + return; + } + + LLT GCDTy = getGCDType(SrcTy, PartTy); + if (GCDTy == PartTy) { + // If this already evenly divisible, we can create a simple unmerge. + B.buildUnmerge(DstRegs, SrcReg); + return; + } + + MachineRegisterInfo &MRI = *B.getMRI(); + LLT DstTy = MRI.getType(DstRegs[0]); + LLT LCMTy = getLCMType(SrcTy, PartTy); + + const unsigned DstSize = DstTy.getSizeInBits(); + const unsigned SrcSize = SrcTy.getSizeInBits(); + unsigned CoveringSize = LCMTy.getSizeInBits(); - for (unsigned i = 0; i < DstRegs.size(); ++i) - MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]); + Register UnmergeSrc = SrcReg; + + if (CoveringSize != SrcSize) { + // For scalars, it's common to be able to use a simple extension. + if (SrcTy.isScalar() && DstTy.isScalar()) { + CoveringSize = alignTo(SrcSize, DstSize); + LLT CoverTy = LLT::scalar(CoveringSize); + UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0); + } else { + // Widen to the common type. + // FIXME: This should respect the extend type + Register Undef = B.buildUndef(SrcTy).getReg(0); + SmallVector<Register, 8> MergeParts(1, SrcReg); + for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize) + MergeParts.push_back(Undef); + UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0); + } + } + + // Unmerge to the original registers and pad with dead defs. + SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end()); + for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize; + Size += DstSize) { + UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); + } + + B.buildUnmerge(UnmergeResults, UnmergeSrc); } -bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - SmallVectorImpl<ArgInfo> &Args, - ValueHandler &Handler) const { +bool CallLowering::determineAndHandleAssignments( + ValueHandler &Handler, ValueAssigner &Assigner, + SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler); + + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext()); + if (!determineAssignments(Assigner, Args, CCInfo)) + return false; + + return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder, + ThisReturnReg); } -bool CallLowering::handleAssignments(CCState &CCInfo, - SmallVectorImpl<CCValAssign> &ArgLocs, - MachineIRBuilder &MIRBuilder, - SmallVectorImpl<ArgInfo> &Args, - ValueHandler &Handler) const { - MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); +static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { + if (Flags.isSExt()) + return TargetOpcode::G_SEXT; + if (Flags.isZExt()) + return TargetOpcode::G_ZEXT; + return TargetOpcode::G_ANYEXT; +} + +bool CallLowering::determineAssignments(ValueAssigner &Assigner, + SmallVectorImpl<ArgInfo> &Args, + CCState &CCInfo) const { + LLVMContext &Ctx = CCInfo.getContext(); + const CallingConv::ID CallConv = CCInfo.getCallingConv(); unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { EVT CurVT = EVT::getEVT(Args[i].Ty); - if (CurVT.isSimple() && - !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(), - CCValAssign::Full, Args[i], Args[i].Flags[0], - CCInfo)) - continue; - MVT NewVT = TLI->getRegisterTypeForCallingConv( - F.getContext(), F.getCallingConv(), EVT(CurVT)); + MVT NewVT = TLI->getRegisterTypeForCallingConv(Ctx, CallConv, CurVT); // If we need to split the type over multiple regs, check it's a scenario // we currently support. - unsigned NumParts = TLI->getNumRegistersForCallingConv( - F.getContext(), F.getCallingConv(), CurVT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(Ctx, CallConv, CurVT); if (NumParts == 1) { // Try to use the register type if we couldn't assign the VT. - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[0], CCInfo)) + if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) return false; continue; } - assert(NumParts > 1); - // For now only handle exact splits. - if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) - return false; - // For incoming arguments (physregs to vregs), we could have values in // physregs (or memlocs) which we want to extract and copy to vregs. // During this, we might have to deal with the LLT being split across @@ -280,68 +572,49 @@ bool CallLowering::handleAssignments(CCState &CCInfo, // If we have outgoing args, then we have the opposite case. We have a // vreg with an LLT which we want to assign to a physical location, and // we might have to record that the value has to be split later. - if (Handler.isIncomingArgumentHandler()) { - // We're handling an incoming arg which is split over multiple regs. - // E.g. passing an s128 on AArch64. - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - Args[i].Regs.clear(); - Args[i].Flags.clear(); - LLT NewLLT = getLLTForMVT(NewVT); - // For each split register, create and assign a vreg that will store - // the incoming component of the larger value. These will later be - // merged to form the final vreg. - for (unsigned Part = 0; Part < NumParts; ++Part) { - Register Reg = - MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); - ISD::ArgFlagsTy Flags = OrigFlags; - if (Part == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (Part == NumParts - 1) - Flags.setSplitEnd(); - } - Args[i].Regs.push_back(Reg); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[Part], CCInfo)) { - // Still couldn't assign this smaller part type for some reason. - return false; - } + + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].Flags.clear(); + + for (unsigned Part = 0; Part < NumParts; ++Part) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align(1)); + if (Part == NumParts - 1) + Flags.setSplitEnd(); } - } else { - // This type is passed via multiple registers in the calling convention. - // We need to extract the individual parts. - Register LargeReg = Args[i].Regs[0]; - LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); - auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); - assert(Unmerge->getNumOperands() == NumParts + 1); - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - // We're going to replace the regs and flags with the split ones. - Args[i].Regs.clear(); - Args[i].Flags.clear(); - for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { - ISD::ArgFlagsTy Flags = OrigFlags; - if (PartIdx == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (PartIdx == NumParts - 1) - Flags.setSplitEnd(); - } - Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, - Args[i], Args[i].Flags[PartIdx], CCInfo)) - return false; + + Args[i].Flags.push_back(Flags); + if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; } } } - for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { - assert(j < ArgLocs.size() && "Skipped too many arg locs"); + return true; +} +bool CallLowering::handleAssignments(ValueHandler &Handler, + SmallVectorImpl<ArgInfo> &Args, + CCState &CCInfo, + SmallVectorImpl<CCValAssign> &ArgLocs, + MachineIRBuilder &MIRBuilder, + Register ThisReturnReg) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + const unsigned NumArgs = Args.size(); + + for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { + assert(j < ArgLocs.size() && "Skipped too many arg locs"); CCValAssign &VA = ArgLocs[j]; assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); @@ -354,93 +627,131 @@ bool CallLowering::handleAssignments(CCState &CCInfo, continue; } - // FIXME: Pack registers if we have more than one. - Register ArgReg = Args[i].Regs[0]; + const MVT ValVT = VA.getValVT(); + const MVT LocVT = VA.getLocVT(); - EVT OrigVT = EVT::getEVT(Args[i].Ty); - EVT VAVT = VA.getValVT(); + const LLT LocTy(LocVT); + const LLT ValTy(ValVT); + const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy; + const EVT OrigVT = EVT::getEVT(Args[i].Ty); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. - unsigned NumArgRegs = Args[i].Regs.size(); + // This should be the same as getNumRegistersForCallingConv + const unsigned NumParts = Args[i].Flags.size(); + + // Now split the registers into the assigned types. + Args[i].OrigRegs.assign(Args[i].Regs.begin(), Args[i].Regs.end()); + + if (NumParts != 1 || NewLLT != OrigTy) { + // If we can't directly assign the register, we need one or more + // intermediate values. + Args[i].Regs.resize(NumParts); - assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) + Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT); + } + + assert((j + (NumParts - 1)) < ArgLocs.size() && "Too many regs for number of args"); - for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + + // Coerce into outgoing value types before register assignment. + if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) { + assert(Args[i].OrigRegs.size() == 1); + buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy, + ValTy, extendOpFromFlags(Args[i].Flags[0])); + } + + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; - if (VA.isMemLoc()) { - // Don't currently support loading/storing a type that needs to be split - // to the stack. Should be easy, just not implemented yet. - if (NumArgRegs > 1) { - LLVM_DEBUG( - dbgs() - << "Load/store a split arg to/from the stack not implemented yet\n"); - return false; - } + const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; - // FIXME: Use correct address space for pointer size - EVT LocVT = VA.getValVT(); - unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize() - : LocVT.getStoreSize(); - unsigned Offset = VA.getLocMemOffset(); - MachinePointerInfo MPO; - Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); - Handler.assignValueToAddress(Args[i], StackAddr, - MemSize, MPO, VA); - continue; - } + if (VA.isMemLoc() && !Flags.isByVal()) { + // Individual pieces may have been spilled to the stack and others + // passed in registers. - assert(VA.isRegLoc() && "custom loc should have been handled already"); + // TODO: The memory size may be larger than the value we need to + // store. We may need to adjust the offset for big endian targets. + LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags); - // GlobalISel does not currently work for scalable vectors. - if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || - !Handler.isIncomingArgumentHandler()) { - // This is an argument that might have been split. There should be - // Regs.size() ArgLocs per argument. + MachinePointerInfo MPO; + Register StackAddr = Handler.getStackAddress( + MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags); - // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge - // to the original register after handling all of the parts. - Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA); continue; } - // This ArgLoc covers multiple pieces, so we need to split it. - const LLT VATy(VAVT.getSimpleVT()); - Register NewReg = - MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); - Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); - // If it's a vector type, we either need to truncate the elements - // or do an unmerge to get the lower block of elements. - if (VATy.isVector() && - VATy.getNumElements() > OrigVT.getVectorNumElements()) { - // Just handle the case where the VA type is 2 * original type. - if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { - LLVM_DEBUG(dbgs() - << "Incoming promoted vector arg has too many elts"); - return false; + if (VA.isMemLoc() && Flags.isByVal()) { + assert(Args[i].Regs.size() == 1 && + "didn't expect split byval pointer"); + + if (Handler.isIncomingArgumentHandler()) { + // We just need to copy the frame index value to the pointer. + MachinePointerInfo MPO; + Register StackAddr = Handler.getStackAddress( + Flags.getByValSize(), VA.getLocMemOffset(), MPO, Flags); + MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr); + } else { + // For outgoing byval arguments, insert the implicit copy byval + // implies, such that writes in the callee do not modify the caller's + // value. + uint64_t MemSize = Flags.getByValSize(); + int64_t Offset = VA.getLocMemOffset(); + + MachinePointerInfo DstMPO; + Register StackAddr = + Handler.getStackAddress(MemSize, Offset, DstMPO, Flags); + + MachinePointerInfo SrcMPO(Args[i].OrigValue); + if (!Args[i].OrigValue) { + // We still need to accurately track the stack address space if we + // don't know the underlying value. + const LLT PtrTy = MRI.getType(StackAddr); + SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace()); + } + + Align DstAlign = std::max(Flags.getNonZeroByValAlign(), + inferAlignFromPtrInfo(MF, DstMPO)); + + Align SrcAlign = std::max(Flags.getNonZeroByValAlign(), + inferAlignFromPtrInfo(MF, SrcMPO)); + + Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0], + DstMPO, DstAlign, SrcMPO, SrcAlign, + MemSize, VA); } - auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); - MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0)); - } else { - MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); + continue; } - } - // Now that all pieces have been handled, re-pack any arguments into any - // wider, original registers. - if (Handler.isIncomingArgumentHandler()) { - if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) { - assert(NumArgRegs >= 2); + assert(!VA.needsCustom() && "custom loc should have been handled already"); - // Merge the split registers into the expected larger result vreg - // of the original call. - MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + if (i == 0 && ThisReturnReg.isValid() && + Handler.isIncomingArgumentHandler() && + isTypeIsValidForThisReturn(ValVT)) { + Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA); + continue; } + + Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } - j += NumArgRegs - 1; + // Now that all pieces have been assigned, re-pack the register typed values + // into the original value typed registers. + if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) { + // Merge the split registers into the expected larger result vregs of + // the original call. + buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, + LocTy, Args[i].Flags[0]); + } + + j += NumParts - 1; } return true; @@ -470,7 +781,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, Register Addr; MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MRI.getType(VRegs[I]).getSizeInBytes(), + MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); MIRBuilder.buildLoad(VRegs[I], Addr, *MMO); } @@ -501,7 +812,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, Register Addr; MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - MRI.getType(VRegs[I]).getSizeInBytes(), + MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); MIRBuilder.buildStore(VRegs[I], Addr, *MMO); } @@ -522,7 +833,8 @@ void CallLowering::insertSRetIncomingArgument( // NOTE: Assume that a pointer won't get split into more than one VT. assert(ValueVTs.size() == 1); - ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext())); + ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()), + ArgInfo::NoArgIndex); setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F); DemoteArg.Flags[0].setSRet(); SplitArgs.insert(SplitArgs.begin(), DemoteArg); @@ -540,7 +852,8 @@ void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false); Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0); - ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS)); + ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS), + ArgInfo::NoArgIndex); setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB); DemoteArg.Flags[0].setSRet(); @@ -594,23 +907,6 @@ bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg()); } -bool CallLowering::analyzeArgInfo(CCState &CCState, - SmallVectorImpl<ArgInfo> &Args, - CCAssignFn &AssignFnFixed, - CCAssignFn &AssignFnVarArg) const { - for (unsigned i = 0, e = Args.size(); i < e; ++i) { - MVT VT = MVT::getVT(Args[i].Ty); - CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; - if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { - // Bail out on anything we can't handle. - LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() - << " (arg number = " << i << "\n"); - return false; - } - } - return true; -} - bool CallLowering::parametersInCSRMatch( const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl<CCValAssign> &OutLocs, @@ -666,10 +962,8 @@ bool CallLowering::parametersInCSRMatch( bool CallLowering::resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl<ArgInfo> &InArgs, - CCAssignFn &CalleeAssignFnFixed, - CCAssignFn &CalleeAssignFnVarArg, - CCAssignFn &CallerAssignFnFixed, - CCAssignFn &CallerAssignFnVarArg) const { + ValueAssigner &CalleeAssigner, + ValueAssigner &CallerAssigner) const { const Function &F = MF.getFunction(); CallingConv::ID CalleeCC = Info.CallConv; CallingConv::ID CallerCC = F.getCallingConv(); @@ -678,15 +972,13 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, return true; SmallVector<CCValAssign, 16> ArgLocs1; - CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); - if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, - CalleeAssignFnVarArg)) + CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext()); + if (!determineAssignments(CalleeAssigner, InArgs, CCInfo1)) return false; SmallVector<CCValAssign, 16> ArgLocs2; - CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); - if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, - CalleeAssignFnVarArg)) + CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext()); + if (!determineAssignments(CallerAssigner, InArgs, CCInfo2)) return false; // We need the argument locations to match up exactly. If there's more in @@ -721,11 +1013,58 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, return true; } +LLT CallLowering::ValueHandler::getStackValueStoreType( + const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { + const MVT ValVT = VA.getValVT(); + if (ValVT != MVT::iPTR) { + LLT ValTy(ValVT); + + // We lost the pointeriness going through CCValAssign, so try to restore it + // based on the flags. + if (Flags.isPointer()) { + LLT PtrTy = LLT::pointer(Flags.getPointerAddrSpace(), + ValTy.getScalarSizeInBits()); + if (ValVT.isVector()) + return LLT::vector(ValTy.getElementCount(), PtrTy); + return PtrTy; + } + + return ValTy; + } + + unsigned AddrSpace = Flags.getPointerAddrSpace(); + return LLT::pointer(AddrSpace, DL.getPointerSize(AddrSpace)); +} + +void CallLowering::ValueHandler::copyArgumentMemory( + const ArgInfo &Arg, Register DstPtr, Register SrcPtr, + const MachinePointerInfo &DstPtrInfo, Align DstAlign, + const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize, + CCValAssign &VA) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *SrcMMO = MF.getMachineMemOperand( + SrcPtrInfo, + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, MemSize, + SrcAlign); + + MachineMemOperand *DstMMO = MF.getMachineMemOperand( + DstPtrInfo, + MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable, + MemSize, DstAlign); + + const LLT PtrTy = MRI.getType(DstPtr); + const LLT SizeTy = LLT::scalar(PtrTy.getSizeInBits()); + + auto SizeConst = MIRBuilder.buildConstant(SizeTy, MemSize); + MIRBuilder.buildMemCpy(DstPtr, SrcPtr, SizeConst, *DstMMO, *SrcMMO); +} + Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits) { LLT LocTy{VA.getLocVT()}; - LLT ValTy = MRI.getType(ValReg); + LLT ValTy{VA.getValVT()}; + if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) return ValReg; @@ -735,6 +1074,14 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg, LocTy = LLT::scalar(MaxSizeBits); } + const LLT ValRegTy = MRI.getType(ValReg); + if (ValRegTy.isPointer()) { + // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so + // we have to cast to do the extension. + LLT IntPtrTy = LLT::scalar(ValRegTy.getSizeInBits()); + ValReg = MIRBuilder.buildPtrToInt(IntPtrTy, ValReg).getReg(0); + } + switch (VA.getLocInfo()) { default: break; case CCValAssign::Full: @@ -760,4 +1107,63 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg, llvm_unreachable("unable to extend register"); } -void CallLowering::ValueHandler::anchor() {} +void CallLowering::ValueAssigner::anchor() {} + +Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA, + Register SrcReg, + LLT NarrowTy) { + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::ZExt: { + return MIRBuilder + .buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg, + NarrowTy.getScalarSizeInBits()) + .getReg(0); + } + case CCValAssign::LocInfo::SExt: { + return MIRBuilder + .buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg, + NarrowTy.getScalarSizeInBits()) + .getReg(0); + break; + } + default: + return SrcReg; + } +} + +/// Check if we can use a basic COPY instruction between the two types. +/// +/// We're currently building on top of the infrastructure using MVT, which loses +/// pointer information in the CCValAssign. We accept copies from physical +/// registers that have been reported as integers if it's to an equivalent sized +/// pointer LLT. +static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { + if (SrcTy == DstTy) + return true; + + if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) + return false; + + SrcTy = SrcTy.getScalarType(); + DstTy = DstTy.getScalarType(); + + return (SrcTy.isPointer() && DstTy.isScalar()) || + (DstTy.isScalar() && SrcTy.isPointer()); +} + +void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg, + Register PhysReg, + CCValAssign &VA) { + const MVT LocVT = VA.getLocVT(); + const LLT LocTy(LocVT); + const LLT RegTy = MRI.getType(ValVReg); + + if (isCopyCompatibleType(RegTy, LocTy)) { + MIRBuilder.buildCopy(ValVReg, PhysReg); + return; + } + + auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg); + auto Hint = buildExtensionHint(VA, Copy.getReg(0), RegTy); + MIRBuilder.buildTrunc(ValVReg, Hint); +} diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index f1071d96e5a3..6f103bca6892 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -153,8 +153,14 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, MFChanged |= Changed; } while (Changed); - assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && - "CSEInfo is not consistent. Likely missing calls to " - "observer on mutations")); +#ifndef NDEBUG + if (CSEInfo) { + if (auto E = CSEInfo->verify()) { + errs() << E << '\n'; + assert(false && "CSEInfo is not consistent. Likely missing calls to " + "observer on mutations."); + } + } +#endif return MFChanged; } diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index df0219fcfa64..06d827de2e96 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6,13 +6,18 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -20,8 +25,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include <tuple> #define DEBUG_TYPE "gi-combiner" @@ -436,16 +443,13 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, // to find a safe place to sink it) whereas the extend is freely movable. // It also prevents us from duplicating the load for the volatile case or just // for performance. - - if (MI.getOpcode() != TargetOpcode::G_LOAD && - MI.getOpcode() != TargetOpcode::G_SEXTLOAD && - MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) + GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI); + if (!LoadMI) return false; - auto &LoadValue = MI.getOperand(0); - assert(LoadValue.isReg() && "Result wasn't a register?"); + Register LoadReg = LoadMI->getDstReg(); - LLT LoadValueTy = MRI.getType(LoadValue.getReg()); + LLT LoadValueTy = MRI.getType(LoadReg); if (!LoadValueTy.isScalar()) return false; @@ -467,27 +471,29 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, // and emit a variant of (extend (trunc X)) for the others according to the // relative type sizes. At the same time, pick an extend to use based on the // extend involved in the chosen type. - unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD - ? TargetOpcode::G_ANYEXT - : MI.getOpcode() == TargetOpcode::G_SEXTLOAD - ? TargetOpcode::G_SEXT - : TargetOpcode::G_ZEXT; + unsigned PreferredOpcode = + isa<GLoad>(&MI) + ? TargetOpcode::G_ANYEXT + : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; Preferred = {LLT(), PreferredOpcode, nullptr}; - for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { + for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) { if (UseMI.getOpcode() == TargetOpcode::G_SEXT || UseMI.getOpcode() == TargetOpcode::G_ZEXT || (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { + const auto &MMO = LoadMI->getMMO(); + // For atomics, only form anyextending loads. + if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) + continue; // Check for legality. if (LI) { LegalityQuery::MemDesc MMDesc; - const auto &MMO = **MI.memoperands_begin(); - MMDesc.SizeInBits = MMO.getSizeInBits(); + MMDesc.MemoryTy = MMO.getMemoryType(); MMDesc.AlignInBits = MMO.getAlign().value() * 8; - MMDesc.Ordering = MMO.getOrdering(); + MMDesc.Ordering = MMO.getSuccessOrdering(); LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != - LegalizeActions::Legal) + LLT SrcTy = MRI.getType(LoadMI->getPointerReg()); + if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) + .Action != LegalizeActions::Legal) continue; } Preferred = ChoosePreferredUse(Preferred, @@ -660,23 +666,22 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { uint64_t SizeInBits = MI.getOperand(2).getImm(); // If the source is a G_SEXTLOAD from the same bit width, then we don't // need any extend at all, just a truncate. - if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { - const auto &MMO = **LoadMI->memoperands_begin(); + if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) { // If truncating more than the original extended value, abort. - if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) + auto LoadSizeBits = LoadMI->getMemSizeInBits(); + if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits) return false; - if (MMO.getSizeInBits() == SizeInBits) + if (LoadSizeBits == SizeInBits) return true; } return false; } -bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { +void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Builder.setInstrAndDebugLoc(MI); Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchSextInRegOfLoad( @@ -688,20 +693,16 @@ bool CombinerHelper::matchSextInRegOfLoad( return false; Register SrcReg = MI.getOperand(1).getReg(); - MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); - if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) + auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI); + if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || + !LoadDef->isSimple()) return false; // If the sign extend extends from a narrower width than the load's width, // then we can narrow the load width when we combine to a G_SEXTLOAD. - auto &MMO = **LoadDef->memoperands_begin(); - // Don't do this for non-simple loads. - if (MMO.isAtomic() || MMO.isVolatile()) - return false; - // Avoid widening the load at all. - unsigned NewSizeBits = - std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); + unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), + LoadDef->getMemSizeInBits()); // Don't generate G_SEXTLOADs with a < 1 byte width. if (NewSizeBits < 8) @@ -710,18 +711,17 @@ bool CombinerHelper::matchSextInRegOfLoad( // anyway for most targets. if (!isPowerOf2_32(NewSizeBits)) return false; - MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); + MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits); return true; } -bool CombinerHelper::applySextInRegOfLoad( +void CombinerHelper::applySextInRegOfLoad( MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register LoadReg; unsigned ScalarSizeBits; std::tie(LoadReg, ScalarSizeBits) = MatchInfo; - auto *LoadDef = MRI.getVRegDef(LoadReg); - assert(LoadDef && "Expected a load reg"); + GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg)); // If we have the following: // %ld = G_LOAD %ptr, (load 2) @@ -729,15 +729,14 @@ bool CombinerHelper::applySextInRegOfLoad( // ==> // %ld = G_SEXTLOAD %ptr (load 1) - auto &MMO = **LoadDef->memoperands_begin(); - Builder.setInstrAndDebugLoc(MI); + auto &MMO = LoadDef->getMMO(); + Builder.setInstrAndDebugLoc(*LoadDef); auto &MF = Builder.getMF(); auto PtrInfo = MMO.getPointerInfo(); auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), - LoadDef->getOperand(1).getReg(), *NewMMO); + LoadDef->getPointerReg(), *NewMMO); MI.eraseFromParent(); - return true; } bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, @@ -941,10 +940,104 @@ void CombinerHelper::applyCombineIndexedLoadStore( LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } -bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { - if (MI.getOpcode() != TargetOpcode::G_BR) +bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, + MachineInstr *&OtherMI) { + unsigned Opcode = MI.getOpcode(); + bool IsDiv, IsSigned; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: { + IsDiv = true; + IsSigned = Opcode == TargetOpcode::G_SDIV; + break; + } + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + IsDiv = false; + IsSigned = Opcode == TargetOpcode::G_SREM; + break; + } + } + + Register Src1 = MI.getOperand(1).getReg(); + unsigned DivOpcode, RemOpcode, DivremOpcode; + if (IsSigned) { + DivOpcode = TargetOpcode::G_SDIV; + RemOpcode = TargetOpcode::G_SREM; + DivremOpcode = TargetOpcode::G_SDIVREM; + } else { + DivOpcode = TargetOpcode::G_UDIV; + RemOpcode = TargetOpcode::G_UREM; + DivremOpcode = TargetOpcode::G_UDIVREM; + } + + if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}})) return false; + // Combine: + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + // Combine: + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) { + if (MI.getParent() == UseMI.getParent() && + ((IsDiv && UseMI.getOpcode() == RemOpcode) || + (!IsDiv && UseMI.getOpcode() == DivOpcode)) && + matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) { + OtherMI = &UseMI; + return true; + } + } + + return false; +} + +void CombinerHelper::applyCombineDivRem(MachineInstr &MI, + MachineInstr *&OtherMI) { + unsigned Opcode = MI.getOpcode(); + assert(OtherMI && "OtherMI shouldn't be empty."); + + Register DestDivReg, DestRemReg; + if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) { + DestDivReg = MI.getOperand(0).getReg(); + DestRemReg = OtherMI->getOperand(0).getReg(); + } else { + DestDivReg = OtherMI->getOperand(0).getReg(); + DestRemReg = MI.getOperand(0).getReg(); + } + + bool IsSigned = + Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; + + // Check which instruction is first in the block so we don't break def-use + // deps by "moving" the instruction incorrectly. + if (dominates(MI, *OtherMI)) + Builder.setInstrAndDebugLoc(MI); + else + Builder.setInstrAndDebugLoc(*OtherMI); + + Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM + : TargetOpcode::G_UDIVREM, + {DestDivReg, DestRemReg}, + {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + MI.eraseFromParent(); + OtherMI->eraseFromParent(); +} + +bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI, + MachineInstr *&BrCond) { + assert(MI.getOpcode() == TargetOpcode::G_BR); + // Try to match the following: // bb1: // G_BRCOND %c1, %bb2 @@ -964,21 +1057,20 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { return false; assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); - MachineInstr *BrCond = &*std::prev(BrIt); + BrCond = &*std::prev(BrIt); if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) return false; - // Check that the next block is the conditional branch target. - if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) - return false; - return true; + // Check that the next block is the conditional branch target. Also make sure + // that it isn't the same as the G_BR's target (otherwise, this will loop.) + MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); + return BrCondTarget != MI.getOperand(0).getMBB() && + MBB->isLayoutSuccessor(BrCondTarget); } -void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { +void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, + MachineInstr *&BrCond) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); - MachineBasicBlock::iterator BrIt(MI); - MachineInstr *BrCond = &*std::prev(BrIt); - Builder.setInstrAndDebugLoc(*BrCond); LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); // FIXME: Does int/fp matter for this? If so, we might need to restrict @@ -1056,7 +1148,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, MVT VT = getMVTForLLT(Ty); if (NumMemOps && Op.allowOverlap() && NewTySize < Size && TLI.allowsMisalignedMemoryAccesses( - VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), MachineMemOperand::MONone, &Fast) && Fast) TySize = Size; @@ -1117,7 +1209,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { } bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, - Register Val, unsigned KnownLen, + Register Val, uint64_t KnownLen, Align Alignment, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1211,7 +1303,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, } auto *StoreMMO = - MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); + MF.getMachineMemOperand(&DstMMO, DstOff, Ty); Register Ptr = Dst; if (DstOff != 0) { @@ -1229,10 +1321,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, return true; } +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); + + const auto *MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + // FIXME: support dynamically sized G_MEMCPY_INLINE + assert(LenVRegAndVal.hasValue() && + "inline memcpy with dynamic size is not yet supported"); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + Align DstAlign = DstMMO.getBaseAlign(); + Align SrcAlign = SrcMMO.getBaseAlign(); + + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); +} + +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, + Register Src, uint64_t KnownLen, + Align DstAlign, Align SrcAlign, + bool IsVolatile) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + return optimizeMemcpy(MI, Dst, Src, KnownLen, + std::numeric_limits<uint64_t>::max(), DstAlign, + SrcAlign, IsVolatile); +} + bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, - Align DstAlign, Align SrcAlign, - bool IsVolatile) { + Register Src, uint64_t KnownLen, + uint64_t Limit, Align DstAlign, + Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); auto &DL = MF.getDataLayout(); @@ -1242,7 +1375,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); Align Alignment = commonAlignment(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); @@ -1253,7 +1385,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining // if the memcpy is in a tail call position. - unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); std::vector<LLT> MemOps; const auto &DstMMO = **MI.memoperands_begin(); @@ -1277,7 +1408,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TRI->needsStackRealignment(MF)) + if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign = NewAlign / 2; @@ -1336,7 +1467,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, } bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, + Register Src, uint64_t KnownLen, Align DstAlign, Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); @@ -1382,7 +1513,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TRI->needsStackRealignment(MF)) + if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign = NewAlign / 2; @@ -1449,10 +1580,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; - bool IsVolatile = MemOp->isVolatile(); - // Don't try to optimize volatile. - if (IsVolatile) - return false; Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; @@ -1470,18 +1597,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); return true; } + bool IsVolatile = MemOp->isVolatile(); + if (Opc == TargetOpcode::G_MEMCPY_INLINE) + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); + + // Don't try to optimize volatile. + if (IsVolatile) + return false; + if (MaxLen && KnownLen > MaxLen) return false; - if (Opc == TargetOpcode::G_MEMCPY) - return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (Opc == TargetOpcode::G_MEMCPY) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + bool OptSize = shouldLowerMemFuncForSize(MF); + uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); + return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, + IsVolatile); + } if (Opc == TargetOpcode::G_MEMMOVE) return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); if (Opc == TargetOpcode::G_MEMSET) @@ -1540,7 +1682,7 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, return Cst.hasValue(); } -bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, +void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, Optional<APFloat> &Cst) { assert(Cst.hasValue() && "Optional is unexpectedly empty!"); Builder.setInstrAndDebugLoc(MI); @@ -1549,7 +1691,6 @@ bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, Register DstReg = MI.getOperand(0).getReg(); Builder.buildFConstant(DstReg, *FPVal); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, @@ -1569,6 +1710,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, if (!MaybeImmVal) return false; + // Don't do this combine if there multiple uses of the first PTR_ADD, + // since we may be able to compute the second PTR_ADD as an immediate + // offset anyway. Folding the first offset into the second may cause us + // to go beyond the bounds of our legal addressing modes. + if (!MRI.hasOneNonDBGUse(Add2)) + return false; + MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) return false; @@ -1585,7 +1733,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, return true; } -bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, +void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); MachineIRBuilder MIB(MI); @@ -1595,7 +1743,6 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, MI.getOperand(1).setReg(MatchInfo.Base); MI.getOperand(2).setReg(NewOffset.getReg(0)); Observer.changedInstr(MI); - return true; } bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, @@ -1643,7 +1790,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, return true; } -bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, +void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) { unsigned Opcode = MI.getOpcode(); assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || @@ -1661,7 +1808,7 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { Builder.buildConstant(MI.getOperand(0), 0); MI.eraseFromParent(); - return true; + return; } // Arithmetic shift and saturating signed left shift have no effect beyond // scalar size. @@ -1674,7 +1821,6 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, MI.getOperand(1).setReg(MatchInfo.Reg); MI.getOperand(2).setReg(NewImm); Observer.changedInstr(MI); - return true; } bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, @@ -1758,7 +1904,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, return true; } -bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, +void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) { unsigned Opcode = MI.getOpcode(); assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || @@ -1790,7 +1936,6 @@ bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, MatchInfo.Logic->eraseFromParent(); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, @@ -1805,7 +1950,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, return (static_cast<int32_t>(ShiftVal) != -1); } -bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, +void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); MachineIRBuilder MIB(MI); @@ -1815,7 +1960,6 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); MI.getOperand(2).setReg(ShiftCst.getReg(0)); Observer.changedInstr(MI); - return true; } // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source @@ -1856,7 +2000,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, return MinLeadingZeros >= ShiftAmt; } -bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, +void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) { Register ExtSrcReg = MatchData.Reg; int64_t ShiftAmtVal = MatchData.Imm; @@ -1868,6 +2012,24 @@ bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); Builder.buildZExt(MI.getOperand(0), NarrowShift); MI.eraseFromParent(); +} + +bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI, + Register &MatchInfo) { + GMerge &Merge = cast<GMerge>(MI); + SmallVector<Register, 16> MergedValues; + for (unsigned I = 0; I < Merge.getNumSources(); ++I) + MergedValues.emplace_back(Merge.getSourceReg(I)); + + auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI); + if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources()) + return false; + + for (unsigned I = 0; I < MergedValues.size(); ++I) + if (MergedValues[I] != Unmerge->getReg(I)) + return false; + + MatchInfo = Unmerge->getSourceReg(); return true; } @@ -1906,7 +2068,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( return true; } -bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( +void CombinerHelper::applyCombineUnmergeMergeToPlainValues( MachineInstr &MI, SmallVectorImpl<Register> &Operands) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); @@ -1927,7 +2089,6 @@ bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( Builder.buildCast(DstReg, SrcReg); } MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, @@ -1955,7 +2116,7 @@ bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, return true; } -bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, +void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl<APInt> &Csts) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); @@ -1969,7 +2130,6 @@ bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, } MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { @@ -1983,7 +2143,7 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { return true; } -bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { +void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { Builder.setInstrAndDebugLoc(MI); Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); // Truncating a vector is going to truncate every single lane, @@ -2002,7 +2162,6 @@ bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { } else Builder.buildTrunc(Dst0Reg, SrcReg); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { @@ -2031,7 +2190,7 @@ bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); } -bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { +void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); @@ -2063,7 +2222,6 @@ bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); } MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, @@ -2091,7 +2249,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, return ShiftVal >= Size / 2 && ShiftVal < Size; } -bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, +void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2162,7 +2320,6 @@ bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, } MI.eraseFromParent(); - return true; } bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, @@ -2185,13 +2342,12 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); } -bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { +void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); Register DstReg = MI.getOperand(0).getReg(); Builder.setInstr(MI); Builder.buildCopy(DstReg, Reg); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { @@ -2200,13 +2356,12 @@ bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); } -bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { +void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); Register DstReg = MI.getOperand(0).getReg(); Builder.setInstr(MI); Builder.buildZExtOrTrunc(DstReg, Reg); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineAddP2IToPtrAdd( @@ -2234,7 +2389,7 @@ bool CombinerHelper::matchCombineAddP2IToPtrAdd( return false; } -bool CombinerHelper::applyCombineAddP2IToPtrAdd( +void CombinerHelper::applyCombineAddP2IToPtrAdd( MachineInstr &MI, std::pair<Register, bool> &PtrReg) { Register Dst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); @@ -2251,7 +2406,6 @@ bool CombinerHelper::applyCombineAddP2IToPtrAdd( auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); Builder.buildPtrToInt(Dst, PtrAdd); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, @@ -2272,7 +2426,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, return false; } -bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, +void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst) { assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); Register Dst = MI.getOperand(0).getReg(); @@ -2280,7 +2434,6 @@ bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, Builder.setInstrAndDebugLoc(MI); Builder.buildConstant(Dst, NewCst); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { @@ -2292,12 +2445,18 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); } -bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { - assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); +bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT"); Register DstReg = MI.getOperand(0).getReg(); - MI.eraseFromParent(); - replaceRegWith(MRI, DstReg, Reg); - return true; + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (mi_match(SrcReg, MRI, + m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) { + unsigned DstSize = DstTy.getScalarSizeInBits(); + unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits(); + return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize; + } + return false; } bool CombinerHelper::matchCombineExtOfExt( @@ -2321,7 +2480,7 @@ bool CombinerHelper::matchCombineExtOfExt( return false; } -bool CombinerHelper::applyCombineExtOfExt( +void CombinerHelper::applyCombineExtOfExt( MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || MI.getOpcode() == TargetOpcode::G_SEXT || @@ -2336,7 +2495,7 @@ bool CombinerHelper::applyCombineExtOfExt( Observer.changingInstr(MI); MI.getOperand(1).setReg(Reg); Observer.changedInstr(MI); - return true; + return; } // Combine: @@ -2349,13 +2508,10 @@ bool CombinerHelper::applyCombineExtOfExt( Builder.setInstrAndDebugLoc(MI); Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); MI.eraseFromParent(); - return true; } - - return false; } -bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { +void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2365,7 +2521,6 @@ bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, MI.getFlags()); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { @@ -2381,14 +2536,6 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); } -bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { - assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); - Register Dst = MI.getOperand(0).getReg(); - MI.eraseFromParent(); - replaceRegWith(MRI, Dst, Src); - return true; -} - bool CombinerHelper::matchCombineTruncOfExt( MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); @@ -2403,7 +2550,7 @@ bool CombinerHelper::matchCombineTruncOfExt( return false; } -bool CombinerHelper::applyCombineTruncOfExt( +void CombinerHelper::applyCombineTruncOfExt( MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); Register SrcReg = MatchInfo.first; @@ -2414,7 +2561,7 @@ bool CombinerHelper::applyCombineTruncOfExt( if (SrcTy == DstTy) { MI.eraseFromParent(); replaceRegWith(MRI, DstReg, SrcReg); - return true; + return; } Builder.setInstrAndDebugLoc(MI); if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) @@ -2422,7 +2569,6 @@ bool CombinerHelper::applyCombineTruncOfExt( else Builder.buildTrunc(DstReg, SrcReg); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchCombineTruncOfShl( @@ -2449,7 +2595,7 @@ bool CombinerHelper::matchCombineTruncOfShl( return false; } -bool CombinerHelper::applyCombineTruncOfShl( +void CombinerHelper::applyCombineTruncOfShl( MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); Register DstReg = MI.getOperand(0).getReg(); @@ -2463,7 +2609,6 @@ bool CombinerHelper::applyCombineTruncOfShl( auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { @@ -2662,6 +2807,14 @@ bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { return true; } +bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { + assert(MI.getNumDefs() == 1 && "Expected only one def?"); + Builder.setInstr(MI); + Builder.buildConstant(MI.getOperand(0), C); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); @@ -2731,7 +2884,7 @@ bool CombinerHelper::matchCombineInsertVecElts( return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; } -bool CombinerHelper::applyCombineInsertVecElts( +void CombinerHelper::applyCombineInsertVecElts( MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { Builder.setInstr(MI); Register UndefReg; @@ -2748,17 +2901,15 @@ bool CombinerHelper::applyCombineInsertVecElts( } Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); MI.eraseFromParent(); - return true; } -bool CombinerHelper::applySimplifyAddToSub( +void CombinerHelper::applySimplifyAddToSub( MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { Builder.setInstr(MI); Register SubLHS, SubRHS; std::tie(SubLHS, SubRHS) = MatchInfo; Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( @@ -2852,7 +3003,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( return true; } -bool CombinerHelper::applyBuildInstructionSteps( +void CombinerHelper::applyBuildInstructionSteps( MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { assert(MatchInfo.InstrsToBuild.size() && "Expected at least one instr to build?"); @@ -2865,7 +3016,6 @@ bool CombinerHelper::applyBuildInstructionSteps( OperandFn(Instr); } MI.eraseFromParent(); - return true; } bool CombinerHelper::matchAshrShlToSextInreg( @@ -2885,7 +3035,8 @@ bool CombinerHelper::matchAshrShlToSextInreg( MatchInfo = std::make_tuple(Src, ShlCst); return true; } -bool CombinerHelper::applyAshShlToSextInreg( + +void CombinerHelper::applyAshShlToSextInreg( MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_ASHR); Register Src; @@ -2895,6 +3046,32 @@ bool CombinerHelper::applyAshShlToSextInreg( Builder.setInstrAndDebugLoc(MI); Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); MI.eraseFromParent(); +} + +/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 +bool CombinerHelper::matchOverlappingAnd( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + Register R; + int64_t C1; + int64_t C2; + if (!mi_match( + Dst, MRI, + m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2)))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (C1 & C2) { + B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2)); + return; + } + auto Zero = B.buildConstant(Ty, 0); + replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg()); + }; return true; } @@ -3091,7 +3268,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, return true; } -bool CombinerHelper::applyNotCmp(MachineInstr &MI, +void CombinerHelper::applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) { for (Register Reg : RegsToNegate) { MachineInstr *Def = MRI.getVRegDef(Reg); @@ -3121,7 +3298,6 @@ bool CombinerHelper::applyNotCmp(MachineInstr &MI, replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); - return true; } bool CombinerHelper::matchXorOfAndWithSameReg( @@ -3155,7 +3331,7 @@ bool CombinerHelper::matchXorOfAndWithSameReg( return Y == SharedReg; } -bool CombinerHelper::applyXorOfAndWithSameReg( +void CombinerHelper::applyXorOfAndWithSameReg( MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { // Fold (xor (and x, y), y) -> (and (not x), y) Builder.setInstrAndDebugLoc(MI); @@ -3167,7 +3343,6 @@ bool CombinerHelper::applyXorOfAndWithSameReg( MI.getOperand(1).setReg(Not->getOperand(0).getReg()); MI.getOperand(2).setReg(Y); Observer.changedInstr(MI); - return true; } bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { @@ -3188,16 +3363,15 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { return isBuildVectorAllZeros(*VecMI, MRI); } -bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { +void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); Builder.setInstrAndDebugLoc(MI); Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); MI.eraseFromParent(); - return true; } /// The second source operand is known to be a power of 2. -bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { +void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { Register DstReg = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Pow2Src1 = MI.getOperand(2).getReg(); @@ -3209,7 +3383,6 @@ bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); Builder.buildAnd(DstReg, Src0, Add); MI.eraseFromParent(); - return true; } Optional<SmallVector<Register, 8>> @@ -3283,7 +3456,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { /// e.g. x[i] << 24 /// /// \returns The load instruction and the byte offset it is moved into. -static Optional<std::pair<MachineInstr *, int64_t>> +static Optional<std::pair<GZExtLoad *, int64_t>> matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI) { assert(MRI.hasOneNonDBGUse(Reg) && @@ -3300,18 +3473,17 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, return None; // TODO: Handle other types of loads. - auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); + auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI); if (!Load) return None; - const auto &MMO = **Load->memoperands_begin(); - if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) + if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits) return None; return std::make_pair(Load, Shift / MemSizeInBits); } -Optional<std::pair<MachineInstr *, int64_t>> +Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> CombinerHelper::findLoadOffsetsForLoadOrCombine( SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { @@ -3323,7 +3495,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( int64_t LowestIdx = INT64_MAX; // The load which uses the lowest index. - MachineInstr *LowestIdxLoad = nullptr; + GZExtLoad *LowestIdxLoad = nullptr; // Keeps track of the load indices we see. We shouldn't see any indices twice. SmallSet<int64_t, 8> SeenIdx; @@ -3334,10 +3506,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( const MachineMemOperand *MMO = nullptr; // Earliest instruction-order load in the pattern. - MachineInstr *EarliestLoad = nullptr; + GZExtLoad *EarliestLoad = nullptr; // Latest instruction-order load in the pattern. - MachineInstr *LatestLoad = nullptr; + GZExtLoad *LatestLoad = nullptr; // Base pointer which every load should share. Register BasePtr; @@ -3352,7 +3524,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); if (!LoadAndPos) return None; - MachineInstr *Load; + GZExtLoad *Load; int64_t DstPos; std::tie(Load, DstPos) = *LoadAndPos; @@ -3365,10 +3537,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( return None; // Make sure that the MachineMemOperands of every seen load are compatible. - const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); + auto &LoadMMO = Load->getMMO(); if (!MMO) - MMO = LoadMMO; - if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) + MMO = &LoadMMO; + if (MMO->getAddrSpace() != LoadMMO.getAddrSpace()) return None; // Find out what the base pointer and index for the load is. @@ -3442,7 +3614,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( return None; } - return std::make_pair(LowestIdxLoad, LowestIdx); + return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); } bool CombinerHelper::matchLoadOrCombine( @@ -3490,13 +3662,13 @@ bool CombinerHelper::matchLoadOrCombine( // Also verify that each of these ends up putting a[i] into the same memory // offset as a load into a wide type would. SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; - MachineInstr *LowestIdxLoad; + GZExtLoad *LowestIdxLoad, *LatestLoad; int64_t LowestIdx; auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); if (!MaybeLoadInfo) return false; - std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; + std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo; // We have a bunch of loads being OR'd together. Using the addresses + offsets // we found before, check if this corresponds to a big or little endian byte @@ -3530,12 +3702,12 @@ bool CombinerHelper::matchLoadOrCombine( // We wil reuse the pointer from the load which ends up at byte offset 0. It // may not use index 0. - Register Ptr = LowestIdxLoad->getOperand(1).getReg(); - const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); + Register Ptr = LowestIdxLoad->getPointerReg(); + const MachineMemOperand &MMO = LowestIdxLoad->getMMO(); LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = WideMemSizeInBits; + MMDesc.MemoryTy = Ty; MMDesc.AlignInBits = MMO.getAlign().value() * 8; - MMDesc.Ordering = MMO.getOrdering(); + MMDesc.Ordering = MMO.getSuccessOrdering(); if (!isLegalOrBeforeLegalizer( {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) return false; @@ -3551,6 +3723,7 @@ bool CombinerHelper::matchLoadOrCombine( return false; MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.setInstrAndDebugLoc(*LatestLoad); Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; MIB.buildLoad(LoadDst, Ptr, *NewMMO); if (NeedsBSwap) @@ -3559,11 +3732,535 @@ bool CombinerHelper::matchLoadOrCombine( return true; } -bool CombinerHelper::applyLoadOrCombine( +bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, + MachineInstr *&ExtMI) { + assert(MI.getOpcode() == TargetOpcode::G_PHI); + + Register DstReg = MI.getOperand(0).getReg(); + + // TODO: Extending a vector may be expensive, don't do this until heuristics + // are better. + if (MRI.getType(DstReg).isVector()) + return false; + + // Try to match a phi, whose only use is an extend. + if (!MRI.hasOneNonDBGUse(DstReg)) + return false; + ExtMI = &*MRI.use_instr_nodbg_begin(DstReg); + switch (ExtMI->getOpcode()) { + case TargetOpcode::G_ANYEXT: + return true; // G_ANYEXT is usually free. + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + break; + default: + return false; + } + + // If the target is likely to fold this extend away, don't propagate. + if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI)) + return false; + + // We don't want to propagate the extends unless there's a good chance that + // they'll be optimized in some way. + // Collect the unique incoming values. + SmallPtrSet<MachineInstr *, 4> InSrcs; + for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { + auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI); + switch (DefMI->getOpcode()) { + case TargetOpcode::G_LOAD: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_CONSTANT: + InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI)); + // Don't try to propagate if there are too many places to create new + // extends, chances are it'll increase code size. + if (InSrcs.size() > 2) + return false; + break; + default: + return false; + } + } + return true; +} + +void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, + MachineInstr *&ExtMI) { + assert(MI.getOpcode() == TargetOpcode::G_PHI); + Register DstReg = ExtMI->getOperand(0).getReg(); + LLT ExtTy = MRI.getType(DstReg); + + // Propagate the extension into the block of each incoming reg's block. + // Use a SetVector here because PHIs can have duplicate edges, and we want + // deterministic iteration order. + SmallSetVector<MachineInstr *, 8> SrcMIs; + SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap; + for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) { + auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg()); + if (!SrcMIs.insert(SrcMI)) + continue; + + // Build an extend after each src inst. + auto *MBB = SrcMI->getParent(); + MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator(); + if (InsertPt != MBB->end() && InsertPt->isPHI()) + InsertPt = MBB->getFirstNonPHI(); + + Builder.setInsertPt(*SrcMI->getParent(), InsertPt); + Builder.setDebugLoc(MI.getDebugLoc()); + auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, + SrcMI->getOperand(0).getReg()); + OldToNewSrcMap[SrcMI] = NewExt; + } + + // Create a new phi with the extended inputs. + Builder.setInstrAndDebugLoc(MI); + auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); + NewPhi.addDef(DstReg); + for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { + auto &MO = MI.getOperand(SrcIdx); + if (!MO.isReg()) { + NewPhi.addMBB(MO.getMBB()); + continue; + } + auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())]; + NewPhi.addUse(NewSrc->getOperand(0).getReg()); + } + Builder.insertInstr(NewPhi); + ExtMI->eraseFromParent(); +} + +bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, + Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); + // If we have a constant index, look for a G_BUILD_VECTOR source + // and find the source register that the index maps to. + Register SrcVec = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcVec); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}})) + return false; + + auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) + return false; + + unsigned VecIdx = Cst->Value.getZExtValue(); + MachineInstr *BuildVecMI = + getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI); + if (!BuildVecMI) { + BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI); + if (!BuildVecMI) + return false; + LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg()); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}})) + return false; + } + + EVT Ty(getMVTForLLT(SrcTy)); + if (!MRI.hasOneNonDBGUse(SrcVec) && + !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Reg = BuildVecMI->getOperand(VecIdx + 1).getReg(); + return true; +} + +void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, + Register &Reg) { + // Check the type of the register, since it may have come from a + // G_BUILD_VECTOR_TRUNC. + LLT ScalarTy = MRI.getType(Reg); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + Builder.setInstrAndDebugLoc(MI); + if (ScalarTy != DstTy) { + assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits()); + Builder.buildTrunc(DstReg, Reg); + MI.eraseFromParent(); + return; + } + replaceSingleDefInstWithReg(MI, Reg); +} + +bool CombinerHelper::matchExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + // This combine tries to find build_vector's which have every source element + // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like + // the masked load scalarization is run late in the pipeline. There's already + // a combine for a similar pattern starting from the extract, but that + // doesn't attempt to do it if there are multiple uses of the build_vector, + // which in this case is true. Starting the combine from the build_vector + // feels more natural than trying to find sibling nodes of extracts. + // E.g. + // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4 + // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0 + // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1 + // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2 + // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3 + // ==> + // replace ext{1,2,3,4} with %s{1,2,3,4} + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + unsigned NumElts = DstTy.getNumElements(); + + SmallBitVector ExtractedElts(NumElts); + for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), + MRI.use_instr_nodbg_end())) { + if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) + return false; + auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI); + if (!Cst) + return false; + unsigned Idx = Cst.getValue().getZExtValue(); + if (Idx >= NumElts) + return false; // Out of range. + ExtractedElts.set(Idx); + SrcDstPairs.emplace_back( + std::make_pair(MI.getOperand(Idx + 1).getReg(), &II)); + } + // Match if every element was extracted. + return ExtractedElts.all(); +} + +void CombinerHelper::applyExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + for (auto &Pair : SrcDstPairs) { + auto *ExtMI = Pair.second; + replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first); + ExtMI->eraseFromParent(); + } + MI.eraseFromParent(); +} + +void CombinerHelper::applyBuildFn( MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { Builder.setInstrAndDebugLoc(MI); MatchInfo(Builder); MI.eraseFromParent(); +} + +void CombinerHelper::applyBuildFnNoErase( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); +} + +/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. +bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + if (X != Y) + return false; + unsigned RotateOpc = + Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR; + return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}}); +} + +void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + bool IsFSHL = Opc == TargetOpcode::G_FSHL; + Observer.changingInstr(MI); + MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL + : TargetOpcode::G_ROTR)); + MI.RemoveOperand(2); + Observer.changedInstr(MI); +} + +// Fold (rot x, c) -> (rot x, c % BitSize) +bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Register AmtReg = MI.getOperand(2).getReg(); + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { + if (auto *CI = dyn_cast<ConstantInt>(C)) + OutOfRange |= CI->getValue().uge(Bitsize); + return true; + }; + return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange; +} + +void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Register Amt = MI.getOperand(2).getReg(); + LLT AmtTy = MRI.getType(Amt); + auto Bits = Builder.buildConstant(AmtTy, Bitsize); + Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Amt); + Observer.changedInstr(MI); +} + +bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, + int64_t &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); + auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg()); + Optional<bool> KnownVal; + switch (Pred) { + default: + llvm_unreachable("Unexpected G_ICMP predicate?"); + case CmpInst::ICMP_EQ: + KnownVal = KnownBits::eq(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_NE: + KnownVal = KnownBits::ne(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SGE: + KnownVal = KnownBits::sge(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SGT: + KnownVal = KnownBits::sgt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SLE: + KnownVal = KnownBits::sle(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_SLT: + KnownVal = KnownBits::slt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_UGE: + KnownVal = KnownBits::uge(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_UGT: + KnownVal = KnownBits::ugt(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_ULE: + KnownVal = KnownBits::ule(KnownLHS, KnownRHS); + break; + case CmpInst::ICMP_ULT: + KnownVal = KnownBits::ult(KnownLHS, KnownRHS); + break; + } + if (!KnownVal) + return false; + MatchInfo = + *KnownVal + ? getICmpTrueVal(getTargetLowering(), + /*IsVector = */ + MRI.getType(MI.getOperand(0).getReg()).isVector(), + /* IsFP = */ false) + : 0; + return true; +} + +/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. +bool CombinerHelper::matchBitfieldExtractFromSExtInReg( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Src); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}})) + return false; + int64_t Width = MI.getOperand(2).getImm(); + Register ShiftSrc; + int64_t ShiftImm; + if (!mi_match( + Src, MRI, + m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), + m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) + return false; + if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits()) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + auto Cst1 = B.buildConstant(ExtractTy, ShiftImm); + auto Cst2 = B.buildConstant(ExtractTy, Width); + B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2); + }; + return true; +} + +/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. +bool CombinerHelper::matchBitfieldExtractFromAnd( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( + TargetOpcode::G_UBFX, Ty, Ty)) + return false; + + int64_t AndImm, LSBImm; + Register ShiftSrc; + const unsigned Size = Ty.getScalarSizeInBits(); + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))), + m_ICst(AndImm)))) + return false; + + // The mask is a mask of the low bits iff imm & (imm+1) == 0. + auto MaybeMask = static_cast<uint64_t>(AndImm); + if (MaybeMask & (MaybeMask + 1)) + return false; + + // LSB must fit within the register. + if (static_cast<uint64_t>(LSBImm) >= Size) + return false; + + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto LSBCst = B.buildConstant(ExtractTy, LSBImm); + B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst}); + }; + return true; +} + +bool CombinerHelper::reassociationCanBreakAddressingModePattern( + MachineInstr &PtrAdd) { + assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); + + Register Src1Reg = PtrAdd.getOperand(1).getReg(); + MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI); + if (!Src1Def) + return false; + + Register Src2Reg = PtrAdd.getOperand(2).getReg(); + + if (MRI.hasOneNonDBGUse(Src1Reg)) + return false; + + auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + const APInt &C1APIntVal = *C1; + const APInt &C2APIntVal = *C2; + const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue(); + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { + // This combine may end up running before ptrtoint/inttoptr combines + // manage to eliminate redundant conversions, so try to look through them. + MachineInstr *ConvUseMI = &UseMI; + unsigned ConvUseOpc = ConvUseMI->getOpcode(); + while (ConvUseOpc == TargetOpcode::G_INTTOPTR || + ConvUseOpc == TargetOpcode::G_PTRTOINT) { + Register DefReg = ConvUseMI->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(DefReg)) + break; + ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); + ConvUseOpc = ConvUseMI->getOpcode(); + } + auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD || + ConvUseOpc == TargetOpcode::G_STORE; + if (!LoadStore) + continue; + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + unsigned AS = + MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); + Type *AccessTy = + getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), + PtrAdd.getMF()->getFunction().getContext()); + const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + return true; + } + + return false; +} + +bool CombinerHelper::matchReassocPtrAdd( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); + // We're trying to match a few pointer computation patterns here for + // re-association opportunities. + // 1) Isolating a constant operand to be on the RHS, e.g.: + // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) + // + // 2) Folding two constants in each sub-tree as long as such folding + // doesn't break a legal addressing mode. + // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + MachineInstr *LHS = MRI.getVRegDef(Src1Reg); + MachineInstr *RHS = MRI.getVRegDef(Src2Reg); + + if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { + // Try to match example 1). + if (RHS->getOpcode() != TargetOpcode::G_ADD) + return false; + auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); + if (!C2) + return false; + + MatchInfo = [=,&MI](MachineIRBuilder &B) { + LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + + auto NewBase = + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NewBase.getReg(0)); + MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + } else { + // Try to match example 2. + Register LHSSrc1 = LHS->getOperand(1).getReg(); + Register LHSSrc2 = LHS->getOperand(2).getReg(); + auto C1 = getConstantVRegVal(LHSSrc2, MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(LHSSrc1); + MI.getOperand(2).setReg(NewCst.getReg(0)); + Observer.changedInstr(MI); + }; + } + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); + if (!MaybeCst) + return false; + MatchInfo = *MaybeCst; return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 2de20489e1d1..8146a67d4dfb 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/Module.h" #define DEBUG_TYPE "gisel-known-bits" @@ -87,10 +88,10 @@ LLVM_ATTRIBUTE_UNUSED static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" - << (Known.Zero | Known.One).toString(16, false) << "\n" - << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false) + << toString(Known.Zero | Known.One, 16, false) << "\n" + << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false) << "\n" - << "[" << Depth << "] One: 0x" << Known.One.toString(16, false) + << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false) << "\n"; } @@ -113,6 +114,20 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, Known = KnownBits::commonBits(Known, Known2); } +// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is +// created using Width. Use this function when the inputs are KnownBits +// objects. TODO: Move this KnownBits.h if this is usable in more cases. +static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown, + const KnownBits &OffsetKnown, + const KnownBits &WidthKnown) { + KnownBits Mask(BitWidth); + Mask.Zero = APInt::getBitsSetFrom( + BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth)); + Mask.One = APInt::getLowBitsSet( + BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth)); + return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask; +} + void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth) { @@ -129,7 +144,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, return; } - unsigned BitWidth = DstTy.getSizeInBits(); + unsigned BitWidth = DstTy.getScalarSizeInBits(); auto CacheEntry = ComputeKnownBitsCache.find(R); if (CacheEntry != ComputeKnownBitsCache.end()) { Known = CacheEntry->second; @@ -140,9 +155,6 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } Known = KnownBits(BitWidth); // Don't know anything - if (DstTy.isVector()) - return; // TODO: Handle vectors. - // Depth may get bigger than max depth if it gets passed to a different // GISelKnownBits object. // This may happen when say a generic part uses a GISelKnownBits object @@ -164,6 +176,25 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, Depth); break; + case TargetOpcode::G_BUILD_VECTOR: { + // Collect the known bits that are shared by every demanded vector element. + Known.Zero.setAllBits(); Known.One.setAllBits(); + for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { + if (!DemandedElts[i]) + continue; + + computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Known bits are the values that are shared by every demanded element. + Known = KnownBits::commonBits(Known, Known2); + + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + break; + } case TargetOpcode::COPY: case TargetOpcode::G_PHI: case TargetOpcode::PHI: { @@ -244,6 +275,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_PTR_ADD: { + if (DstTy.isVector()) + break; // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? LLT Ty = MRI.getType(MI.getOperand(1).getReg()); if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) @@ -284,7 +317,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, Depth + 1); - Known = KnownBits::computeForMul(Known, Known2); + Known = KnownBits::mul(Known, Known2); break; } case TargetOpcode::G_SELECT: { @@ -332,6 +365,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { + if (DstTy.isVector()) + break; if (TL.getBooleanContents(DstTy.isVector(), Opcode == TargetOpcode::G_FCMP) == TargetLowering::ZeroOrOneBooleanContent && @@ -347,6 +382,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = Known.sext(BitWidth); break; } + case TargetOpcode::G_ASSERT_SEXT: case TargetOpcode::G_SEXT_INREG: { computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); @@ -368,6 +404,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_ZEXTLOAD: { + if (DstTy.isVector()) + break; // Everything above the retrieved bits is zero Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); break; @@ -401,15 +439,25 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, } case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_PTRTOINT: + if (DstTy.isVector()) + break; // Fall through and handle them the same as zext/trunc. LLVM_FALLTHROUGH; + case TargetOpcode::G_ASSERT_ZEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_TRUNC: { Register SrcReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); - unsigned SrcBitWidth = SrcTy.isPointer() - ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) - : SrcTy.getSizeInBits(); + unsigned SrcBitWidth; + + // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. + if (Opcode == TargetOpcode::G_ASSERT_ZEXT) + SrcBitWidth = MI.getOperand(2).getImm(); + else { + SrcBitWidth = SrcTy.isPointer() + ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) + : SrcTy.getSizeInBits(); + } assert(SrcBitWidth && "SrcBitWidth can't be zero"); Known = Known.zextOrTrunc(SrcBitWidth); computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); @@ -431,6 +479,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::G_UNMERGE_VALUES: { + if (DstTy.isVector()) + break; unsigned NumOps = MI.getNumOperands(); Register SrcReg = MI.getOperand(NumOps - 1).getReg(); if (MRI.getType(SrcReg).isVector()) @@ -451,13 +501,41 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, case TargetOpcode::G_BSWAP: { Register SrcReg = MI.getOperand(1).getReg(); computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); - Known.byteSwap(); + Known = Known.byteSwap(); break; } case TargetOpcode::G_BITREVERSE: { Register SrcReg = MI.getOperand(1).getReg(); computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); - Known.reverseBits(); + Known = Known.reverseBits(); + break; + } + case TargetOpcode::G_UBFX: { + KnownBits SrcOpKnown, OffsetKnown, WidthKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, + Depth + 1); + Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); + break; + } + case TargetOpcode::G_SBFX: { + KnownBits SrcOpKnown, OffsetKnown, WidthKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, + Depth + 1); + Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); + // Sign extend the extracted value using shift left and arithmetic shift + // right. + KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); + KnownBits ShiftKnown = KnownBits::computeForAddSub( + /*Add*/ false, /*NSW*/ false, ExtKnown, WidthKnown); + Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); break; } } @@ -523,6 +601,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } + case TargetOpcode::G_ASSERT_SEXT: case TargetOpcode::G_SEXT_INREG: { // Max of the input and what this extends. Register Src = MI.getOperand(1).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b97c369b832d..73b763710fdf 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -72,6 +72,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/MemoryOpRemark.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -97,6 +98,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) @@ -164,6 +166,8 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<GISelCSEAnalysisWrapperPass>(); if (OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addPreserved<TargetLibraryInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -253,23 +257,13 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { Align IRTranslator::getMemOpAlign(const Instruction &I) { if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) return SI->getAlign(); - if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { + if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) return LI->getAlign(); - } - if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { - // TODO(PR27168): This instruction has no alignment attribute, but unlike - // the default alignment for load/store, the default here is to assume - // it has NATURAL alignment, not DataLayout-specified alignment. - const DataLayout &DL = AI->getModule()->getDataLayout(); - return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType())); - } - if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { - // TODO(PR27168): This instruction has no alignment attribute, but unlike - // the default alignment for load/store, the default here is to assume - // it has NATURAL alignment, not DataLayout-specified alignment. - const DataLayout &DL = AI->getModule()->getDataLayout(); - return Align(DL.getTypeStoreSize(AI->getValOperand()->getType())); - } + if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) + return AI->getAlign(); + if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) + return AI->getAlign(); + OptimizationRemarkMissed R("gisel-irtranslator", "", &I); R << "unable to translate memop: " << ore::NV("Opcode", &I); reportTranslationError(*MF, *TPC, *ORE, R); @@ -840,9 +834,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (CI && CI->getZExtValue() == 1 && - MRI->getType(CondLHS).getSizeInBits() == 1 && - CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); @@ -1307,7 +1300,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { AAMDNodes AAMetadata; LI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(), + Ptr, Flags, MRI->getType(Regs[i]), commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); @@ -1349,7 +1342,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { AAMDNodes AAMetadata; SI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(), + Ptr, Flags, MRI->getType(Vals[i]), commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, SI.getSyncScopeID(), SI.getOrdering()); MIRBuilder.buildStore(Vals[i], Addr, *MMO); @@ -1479,14 +1472,22 @@ bool IRTranslator::translateGetElementPtr(const User &U, // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = 0; - if (auto *VT = dyn_cast<VectorType>(U.getType())) + + // True if we should use a splat vector; using VectorWidth alone is not + // sufficient. + bool WantSplatVector = false; + if (auto *VT = dyn_cast<VectorType>(U.getType())) { VectorWidth = cast<FixedVectorType>(VT)->getNumElements(); + // We don't produce 1 x N vectors; those are treated as scalars. + WantSplatVector = VectorWidth > 1; + } // We might need to splat the base pointer into a vector if the offsets // are vectors. - if (VectorWidth && !PtrTy.isVector()) { + if (WantSplatVector && !PtrTy.isVector()) { BaseReg = - MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg) + MIRBuilder + .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) .getReg(0); PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth); PtrTy = getLLTForType(*PtrIRTy, *DL); @@ -1522,7 +1523,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, Register IdxReg = getOrCreateVReg(*Idx); LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { - if (!IdxTy.isVector() && VectorWidth) { + if (!IdxTy.isVector() && WantSplatVector) { IdxReg = MIRBuilder.buildSplatVector( OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); } @@ -1571,7 +1572,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, Register SrcReg = getOrCreateVReg(**AI); LLT SrcTy = MRI->getType(SrcReg); if (SrcTy.isPointer()) - MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); + MinPtrSize = std::min<unsigned>(SrcTy.getSizeInBits(), MinPtrSize); SrcRegs.push_back(SrcReg); } @@ -1595,6 +1596,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) { + DstAlign = MCI->getDestAlign().valueOrOne(); + SrcAlign = MCI->getSourceAlign().valueOrOne(); } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); @@ -1603,10 +1607,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, DstAlign = MSI->getDestAlign().valueOrOne(); } - // We need to propagate the tail call flag from the IR inst as an argument. - // Otherwise, we have to pessimize and assume later that we cannot tail call - // any memory intrinsics. - ICall.addImm(CI.isTailCall() ? 1 : 0); + if (Opcode != TargetOpcode::G_MEMCPY_INLINE) { + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); + } // Create mem operands to store the alignment and volatile info. auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; @@ -1633,12 +1639,14 @@ void IRTranslator::getStackGuard(Register DstReg, if (!Global) return; + unsigned AddrSpace = Global->getType()->getPointerAddressSpace(); + LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace)); + MachinePointerInfo MPInfo(Global); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; - MachineMemOperand *MemRef = - MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + MachineMemOperand *MemRef = MF->getMachineMemOperand( + MPInfo, Flags, PtrTy, DL->getPointerABIAlignment(AddrSpace)); MIB.setMemRefs({MemRef}); } @@ -1826,6 +1834,16 @@ bool IRTranslator::translateConstrainedFPIntrinsic( bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { + if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) { + if (ORE->enabled()) { + const Function &F = *MI->getParent()->getParent(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + if (MemoryOpRemark::canHandle(MI, TLI)) { + MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI); + R.visit(MI); + } + } + } // If this is a simple intrinsic (that is, we just need to add a def of // a vreg, and uses for each arg operand, then translate it. @@ -1924,9 +1942,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, assert(DI.getVariable()->isValidLocationForIntrinsic( MIRBuilder.getDebugLoc()) && "Expected inlined-at fields to agree"); - if (!V) { - // Currently the optimizer can produce this; insert an undef to - // help debugging. Probably the optimizer should not do this. + if (!V || DI.hasArgList()) { + // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to + // terminate any prior location. MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); @@ -2027,6 +2045,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*CI.getArgOperand(0)), MachineInstr::copyFlagsFromInstruction(CI)); return true; + case Intrinsic::memcpy_inline: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memcpy: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: @@ -2063,7 +2083,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - PtrTy.getSizeInBits() / 8, Align(8))); + PtrTy, Align(8))); return true; } case Intrinsic::stacksave: { @@ -2255,6 +2275,17 @@ bool IRTranslator::translateCallBase(const CallBase &CB, Args.push_back(getOrCreateVRegs(*Arg)); } + if (auto *CI = dyn_cast<CallInst>(&CB)) { + if (ORE->enabled()) { + const Function &F = *CI->getParent()->getParent(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + if (MemoryOpRemark::canHandle(CI, TLI)) { + MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI); + R.visit(CI); + } + } + } + // We don't set HasCalls on MFI here yet because call lowering may decide to // optimize into tail calls. Instead, we defer that to selection where a final // scan is done to check if any instructions are calls. @@ -2349,10 +2380,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { Align Alignment = Info.align.getValueOr( DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); - - uint64_t Size = Info.memVT.getStoreSize(); + LLT MemTy = Info.memVT.isSimple() + ? getLLTForMVT(Info.memVT.getSimpleVT()) + : LLT::scalar(Info.memVT.getStoreSizeInBits()); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Alignment)); + Info.flags, MemTy, Alignment)); } return true; @@ -2423,8 +2455,6 @@ bool IRTranslator::translateInvoke(const User &U, const BasicBlock *EHPadBB = I.getSuccessor(1); const Function *Fn = I.getCalledFunction(); - if (I.isInlineAsm()) - return false; // FIXME: support invoking patchpoint and statepoint intrinsics. if (Fn && Fn->isIntrinsic()) @@ -2442,12 +2472,37 @@ bool IRTranslator::translateInvoke(const User &U, if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; + bool LowerInlineAsm = false; + if (I.isInlineAsm()) { + const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand()); + if (!IA->canThrow()) { + // Fast path without emitting EH_LABELs. + + if (!translateInlineAsm(I, MIRBuilder)) + return false; + + MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(), + *ReturnMBB = &getMBB(*ReturnBB); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne()); + + MIRBuilder.buildBr(*ReturnMBB); + return true; + } else { + LowerInlineAsm = true; + } + } + // Emit the actual call, bracketed by EH_LABELs so that the MF knows about // the region covered by the try. MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - if (!translateCallBase(I, MIRBuilder)) + if (LowerInlineAsm) { + if (!translateInlineAsm(I, MIRBuilder)) + return false; + } else if (!translateCallBase(I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -2695,9 +2750,6 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, auto &TLI = *MF->getSubtarget().getTargetLowering(); auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); - Type *ResType = I.getType(); - Type *ValType = ResType->Type::getStructElementType(0); - auto Res = getOrCreateVRegs(I); Register OldValRes = Res[0]; Register SuccessRes = Res[1]; @@ -2711,9 +2763,9 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, *MF->getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, - DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr, - I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering())); + MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp), + getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(), + I.getSuccessOrdering(), I.getFailureOrdering())); return true; } @@ -2723,8 +2775,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, auto &TLI = *MF->getSubtarget().getTargetLowering(); auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); - Type *ResType = I.getType(); - Register Res = getOrCreateVReg(I); Register Addr = getOrCreateVReg(*I.getPointerOperand()); Register Val = getOrCreateVReg(*I.getValOperand()); @@ -2780,9 +2830,9 @@ bool IRTranslator::translateAtomicRMW(const User &U, MIRBuilder.buildAtomicRMW( Opcode, Res, Addr, Val, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, DL->getTypeStoreSize(ResType), - getMemOpAlign(I), AAMetadata, nullptr, - I.getSyncScopeID(), I.getOrdering())); + Flags, MRI->getType(Val), getMemOpAlign(I), + AAMetadata, nullptr, I.getSyncScopeID(), + I.getOrdering())); return true; } @@ -2853,13 +2903,6 @@ bool IRTranslator::valueIsSplit(const Value &V, bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); - // We only emit constants into the entry block from here. To prevent jumpy - // debug behaviour set the line to 0. - if (const DebugLoc &DL = Inst.getDebugLoc()) - EntryBuilder->setDebugLoc(DILocation::get( - Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); - else - EntryBuilder->setDebugLoc(DebugLoc()); auto &TLI = *MF->getSubtarget().getTargetLowering(); if (TLI.fallBackToDAGISel(Inst)) @@ -2876,6 +2919,13 @@ bool IRTranslator::translate(const Instruction &Inst) { } bool IRTranslator::translate(const Constant &C, Register Reg) { + // We only emit constants into the entry block from here. To prevent jumpy + // debug behaviour set the line to 0. + if (auto CurrInstDL = CurBuilder->getDL()) + EntryBuilder->setDebugLoc(DILocation::get(C.getContext(), 0, 0, + CurrInstDL.getScope(), + CurrInstDL.getInlinedAt())); + if (auto CI = dyn_cast<ConstantInt>(&C)) EntryBuilder->buildConstant(Reg, *CI); else if (auto CF = dyn_cast<ConstantFP>(&C)) @@ -2887,14 +2937,15 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder->buildGlobalValue(Reg, GV); else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { - if (!CAZ->getType()->isVectorTy()) + if (!isa<FixedVectorType>(CAZ->getType())) return false; // Return the scalar if it is a <1 x Ty> vector. - if (CAZ->getNumElements() == 1) + unsigned NumElts = CAZ->getElementCount().getFixedValue(); + if (NumElts == 1) return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get()); SmallVector<Register, 4> Ops; - for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { - Constant &Elt = *CAZ->getElementValue(i); + for (unsigned I = 0; I < NumElts; ++I) { + Constant &Elt = *CAZ->getElementValue(I); Ops.push_back(getOrCreateVReg(Elt)); } EntryBuilder->buildBuildVector(Reg, Ops); @@ -2968,8 +3019,13 @@ void IRTranslator::finalizeBasicBlock() { emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); - // FIXME delete this block below? if (BTB.ContiguousRange && j + 2 == ej) { + // We need to record the replacement phi edge here that normally + // happens in emitBitTestCase before we delete the case, otherwise the + // phi edge will be lost. + addMachineCFGPred({BTB.Parent->getBasicBlock(), + BTB.Cases[ej - 1].TargetBB->getBasicBlock()}, + MBB); // Since we're not going to use the final bit test, remove it. BTB.Cases.pop_back(); break; @@ -3079,7 +3135,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { assert(PendingPHIs.empty() && "stale PHIs"); - if (!DL->isLittleEndian()) { + // Targets which want to use big endian can enable it using + // enableBigEndian() + if (!DL->isLittleEndian() && !CLI->enableBigEndian()) { // Currently we don't properly handle big endian code. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -3121,7 +3179,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); - if (CLI->fallBackToDAGISel(F)) { + if (CLI->fallBackToDAGISel(*MF)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower function: " << ore::NV("Prototype", F.getType()); diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 25fae5487187..75a8f03fcb3f 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -11,7 +11,11 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -50,16 +54,29 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) -InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } +InstructionSelect::InstructionSelect(CodeGenOpt::Level OL) + : MachineFunctionPass(ID), OptLevel(OL) {} + +// In order not to crash when calling getAnalysis during testing with -run-pass +// we use the default opt level here instead of None, so that the addRequired() +// calls are made in getAnalysisUsage(). +InstructionSelect::InstructionSelect() + : MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {} void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); - AU.addRequired<GISelKnownBitsAnalysis>(); - AU.addPreserved<GISelKnownBitsAnalysis>(); + if (OptLevel != CodeGenOpt::None) { + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + } getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -71,13 +88,26 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); - GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + + CodeGenOpt::Level OldOptLevel = OptLevel; + auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; }); + OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None + : MF.getTarget().getOptLevel(); + + GISelKnownBits *KB = nullptr; + if (OptLevel != CodeGenOpt::None) { + KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + if (PSI && PSI->hasProfileSummary()) + BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); + } + CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); - ISel->setupMF(MF, KB, CoverageInfo); + ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -102,6 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { #endif for (MachineBasicBlock *MBB : post_order(&MF)) { + ISel->CurMBB = MBB; if (MBB->empty()) continue; @@ -133,6 +164,25 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } + // Eliminate hints. + if (isPreISelGenericOptimizationHint(MI.getOpcode())) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + // At this point, the destination register class of the hint may have + // been decided. + // + // Propagate that through to the source register. + const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg); + if (DstRC) + MRI.setRegClass(SrcReg, DstRC); + assert(canReplaceReg(DstReg, SrcReg, MRI) && + "Must be able to replace dst with src!"); + MI.eraseFromParent(); + MRI.replaceRegWith(DstReg, SrcReg); + continue; + } + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp new file mode 100644 index 000000000000..727d33fe4a40 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp @@ -0,0 +1,383 @@ +//===- lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp - Legalizer ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implement an interface to specify and query how an illegal operation on a +// given type should be expanded. +// +// Issues to be resolved: +// + Make it fast. +// + Support weird types like i3, <7 x i3>, ... +// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...) +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include <map> + +using namespace llvm; +using namespace LegacyLegalizeActions; + +#define DEBUG_TYPE "legalizer-info" + +raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) { + switch (Action) { + case Legal: + OS << "Legal"; + break; + case NarrowScalar: + OS << "NarrowScalar"; + break; + case WidenScalar: + OS << "WidenScalar"; + break; + case FewerElements: + OS << "FewerElements"; + break; + case MoreElements: + OS << "MoreElements"; + break; + case Bitcast: + OS << "Bitcast"; + break; + case Lower: + OS << "Lower"; + break; + case Libcall: + OS << "Libcall"; + break; + case Custom: + OS << "Custom"; + break; + case Unsupported: + OS << "Unsupported"; + break; + case NotFound: + OS << "NotFound"; + break; + } + return OS; +} + +LegacyLegalizerInfo::LegacyLegalizerInfo() : TablesInitialized(false) { + // Set defaults. + // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the + // fundamental load/store Jakob proposed. Once loads & stores are supported. + setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); + + setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); + + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); + + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); + setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); +} + +void LegacyLegalizerInfo::computeTables() { + assert(TablesInitialized == false); + + for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { + const unsigned Opcode = FirstOp + OpcodeIdx; + for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); + ++TypeIdx) { + // 0. Collect information specified through the setAction API, i.e. + // for specific bit sizes. + // For scalar types: + SizeAndActionsVec ScalarSpecifiedActions; + // For pointer types: + std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; + // For vector types: + std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; + for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { + const LLT Type = LLT2Action.first; + const LegacyLegalizeAction Action = LLT2Action.second; + + auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); + if (Type.isPointer()) + AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( + SizeAction); + else if (Type.isVector()) + ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] + .push_back(SizeAction); + else + ScalarSpecifiedActions.push_back(SizeAction); + } + + // 1. Handle scalar types + { + // Decide how to handle bit sizes for which no explicit specification + // was given. + SizeChangeStrategy S = &unsupportedForDifferentSizes; + if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; + llvm::sort(ScalarSpecifiedActions); + checkPartialSizeAndActionsVector(ScalarSpecifiedActions); + setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); + } + + // 2. Handle pointer types + for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { + llvm::sort(PointerSpecifiedActions.second); + checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); + // For pointer types, we assume that there isn't a meaningfull way + // to change the number of bits used in the pointer. + setPointerAction( + Opcode, TypeIdx, PointerSpecifiedActions.first, + unsupportedForDifferentSizes(PointerSpecifiedActions.second)); + } + + // 3. Handle vector types + SizeAndActionsVec ElementSizesSeen; + for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { + llvm::sort(VectorSpecifiedActions.second); + const uint16_t ElementSize = VectorSpecifiedActions.first; + ElementSizesSeen.push_back({ElementSize, Legal}); + checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); + // For vector types, we assume that the best way to adapt the number + // of elements is to the next larger number of elements type for which + // the vector type is legal, unless there is no such type. In that case, + // legalize towards a vector type with a smaller number of elements. + SizeAndActionsVec NumElementsActions; + for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { + assert(BitsizeAndAction.first % ElementSize == 0); + const uint16_t NumElements = BitsizeAndAction.first / ElementSize; + NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); + } + setVectorNumElementAction( + Opcode, TypeIdx, ElementSize, + moreToWiderTypesAndLessToWidest(NumElementsActions)); + } + llvm::sort(ElementSizesSeen); + SizeChangeStrategy VectorElementSizeChangeStrategy = + &unsupportedForDifferentSizes; + if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + VectorElementSizeChangeStrategy = + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; + setScalarInVectorAction( + Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); + } + } + + TablesInitialized = true; +} + +// FIXME: inefficient implementation for now. Without ComputeValueVTs we're +// probably going to need specialized lookup structures for various types before +// we have any hope of doing well with something like <13 x i3>. Even the common +// cases should do better than what we have now. +std::pair<LegacyLegalizeAction, LLT> +LegacyLegalizerInfo::getAspectAction(const InstrAspect &Aspect) const { + assert(TablesInitialized && "backend forgot to call computeTables"); + // These *have* to be implemented for now, they're the fundamental basis of + // how everything else is transformed. + if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) + return findScalarLegalAction(Aspect); + assert(Aspect.Type.isVector()); + return findVectorLegalAction(Aspect); +} + +LegacyLegalizerInfo::SizeAndActionsVec +LegacyLegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( + const SizeAndActionsVec &v, LegacyLegalizeAction IncreaseAction, + LegacyLegalizeAction DecreaseAction) { + SizeAndActionsVec result; + unsigned LargestSizeSoFar = 0; + if (v.size() >= 1 && v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + LargestSizeSoFar = v[i].first; + if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { + result.push_back({LargestSizeSoFar + 1, IncreaseAction}); + LargestSizeSoFar = v[i].first + 1; + } + } + result.push_back({LargestSizeSoFar + 1, DecreaseAction}); + return result; +} + +LegacyLegalizerInfo::SizeAndActionsVec +LegacyLegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( + const SizeAndActionsVec &v, LegacyLegalizeAction DecreaseAction, + LegacyLegalizeAction IncreaseAction) { + SizeAndActionsVec result; + if (v.size() == 0 || v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { + result.push_back({v[i].first + 1, DecreaseAction}); + } + } + return result; +} + +LegacyLegalizerInfo::SizeAndAction +LegacyLegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { + assert(Size >= 1); + // Find the last element in Vec that has a bitsize equal to or smaller than + // the requested bit size. + // That is the element just before the first element that is bigger than Size. + auto It = partition_point( + Vec, [=](const SizeAndAction &A) { return A.first <= Size; }); + assert(It != Vec.begin() && "Does Vec not start with size 1?"); + int VecIdx = It - Vec.begin() - 1; + + LegacyLegalizeAction Action = Vec[VecIdx].second; + switch (Action) { + case Legal: + case Bitcast: + case Lower: + case Libcall: + case Custom: + return {Size, Action}; + case FewerElements: + // FIXME: is this special case still needed and correct? + // Special case for scalarization: + if (Vec == SizeAndActionsVec({{1, FewerElements}})) + return {1, FewerElements}; + LLVM_FALLTHROUGH; + case NarrowScalar: { + // The following needs to be a loop, as for now, we do allow needing to + // go over "Unsupported" bit sizes before finding a legalizable bit size. + // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, + // we need to iterate over s9, and then to s32 to return (s32, Legal). + // If we want to get rid of the below loop, we should have stronger asserts + // when building the SizeAndActionsVecs, probably not allowing + // "Unsupported" unless at the ends of the vector. + for (int i = VecIdx - 1; i >= 0; --i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); + } + case WidenScalar: + case MoreElements: { + // See above, the following needs to be a loop, at least for now. + for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); + } + case Unsupported: + return {Size, Unsupported}; + case NotFound: + llvm_unreachable("NotFound"); + } + llvm_unreachable("Action has an unknown enum value"); +} + +std::pair<LegacyLegalizeAction, LLT> +LegacyLegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, LLT()}; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); + if (Aspect.Type.isPointer() && + AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == + AddrSpace2PointerActions[OpcodeIdx].end()) { + return {NotFound, LLT()}; + } + const SmallVector<SizeAndActionsVec, 1> &Actions = + Aspect.Type.isPointer() + ? AddrSpace2PointerActions[OpcodeIdx] + .find(Aspect.Type.getAddressSpace()) + ->second + : ScalarActions[OpcodeIdx]; + if (Aspect.Idx >= Actions.size()) + return {NotFound, LLT()}; + const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; + // FIXME: speed up this search, e.g. by using a results cache for repeated + // queries? + auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); + return {SizeAndAction.second, + Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) + : LLT::pointer(Aspect.Type.getAddressSpace(), + SizeAndAction.first)}; +} + +std::pair<LegacyLegalizeAction, LLT> +LegacyLegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isVector()); + // First legalize the vector element size, then legalize the number of + // lanes in the vector. + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, Aspect.Type}; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); + const unsigned TypeIdx = Aspect.Idx; + if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) + return {NotFound, Aspect.Type}; + const SizeAndActionsVec &ElemSizeVec = + ScalarInVectorActions[OpcodeIdx][TypeIdx]; + + LLT IntermediateType; + auto ElementSizeAndAction = + findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); + IntermediateType = LLT::fixed_vector(Aspect.Type.getNumElements(), + ElementSizeAndAction.first); + if (ElementSizeAndAction.second != Legal) + return {ElementSizeAndAction.second, IntermediateType}; + + auto i = NumElements2Actions[OpcodeIdx].find( + IntermediateType.getScalarSizeInBits()); + if (i == NumElements2Actions[OpcodeIdx].end()) { + return {NotFound, IntermediateType}; + } + const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; + auto NumElementsAndAction = + findAction(NumElementsVec, IntermediateType.getNumElements()); + return {NumElementsAndAction.second, + LLT::fixed_vector(NumElementsAndAction.first, + IntermediateType.getScalarSizeInBits())}; +} + +unsigned LegacyLegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const { + assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode"); + return Opcode - FirstOp; +} + + +LegacyLegalizeActionStep +LegacyLegalizerInfo::getAction(const LegalityQuery &Query) const { + for (unsigned i = 0; i < Query.Types.size(); ++i) { + auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); + if (Action.first != Legal) { + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action=" + << Action.first << ", " << Action.second << "\n"); + return {Action.first, i, Action.second}; + } else + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); + } + LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n"); + return {Legal, 0, LLT{}}; +} + diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 1993f6033291..7c5e4e52ca3e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -55,7 +55,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet( SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit; return [=](const LegalityQuery &Query) { TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], - Query.MMODescrs[MMOIdx].SizeInBits, + Query.MMODescrs[MMOIdx].MemoryTy, Query.MMODescrs[MMOIdx].AlignInBits}; return llvm::any_of(TypesAndMemDesc, [=](const TypePairAndMemDesc &Entry) -> bool { @@ -176,7 +176,7 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { - return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index f3ba3f080198..fc2570ae4b8e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -69,8 +69,8 @@ LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, const LLT VecTy = Query.Types[TypeIdx]; unsigned NewNumElements = std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min); - return std::make_pair(TypeIdx, - LLT::vector(NewNumElements, VecTy.getElementType())); + return std::make_pair( + TypeIdx, LLT::fixed_vector(NewNumElements, VecTy.getElementType())); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 5ba9367cac8a..635b1445ee07 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -43,6 +43,13 @@ static cl::opt<bool> cl::desc("Should enable CSE in Legalizer"), cl::Optional, cl::init(false)); +// This is a temporary hack, should be removed soon. +static cl::opt<bool> AllowGInsertAsArtifact( + "allow-ginsert-as-artifact", + cl::desc("Allow G_INSERT to be considered an artifact. Hack around AMDGPU " + "test infinite loops."), + cl::Optional, cl::init(true)); + enum class DebugLocVerifyLevel { None, Legalizations, @@ -103,6 +110,8 @@ static bool isArtifact(const MachineInstr &MI) { case TargetOpcode::G_BUILD_VECTOR: case TargetOpcode::G_EXTRACT: return true; + case TargetOpcode::G_INSERT: + return AllowGInsertAsArtifact; } } using InstListTy = GISelWorkList<256>; @@ -230,7 +239,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, } // Do the legalization for this instruction. - auto Res = Helper.legalizeInstrStep(MI); + auto Res = Helper.legalizeInstrStep(MI, LocObserver); // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e7f40523efaf..c1e0d2549c42 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -16,12 +16,16 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -56,7 +60,8 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { unsigned EltSize = OrigTy.getScalarSizeInBits(); if (LeftoverSize % EltSize != 0) return {-1, -1}; - LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + LeftoverTy = LLT::scalarOrVector( + ElementCount::getFixed(LeftoverSize / EltSize), EltSize); } else { LeftoverTy = LLT::scalar(LeftoverSize); } @@ -100,7 +105,8 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, TLI(*MF.getSubtarget().getTargetLowering()) { } LegalizerHelper::LegalizeResult -LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { +LegalizerHelper::legalizeInstrStep(MachineInstr &MI, + LostDebugLocObserver &LocObserver) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); MIRBuilder.setInstrAndDebugLoc(MI); @@ -115,7 +121,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { return AlreadyLegal; case Libcall: LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); - return libcall(MI); + return libcall(MI, LocObserver); case NarrowScalar: LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); return narrowScalar(MI, Step.TypeIdx, Step.NewType); @@ -173,7 +179,8 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, unsigned EltSize = MainTy.getScalarSizeInBits(); if (LeftoverSize % EltSize != 0) return false; - LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + LeftoverTy = LLT::scalarOrVector( + ElementCount::getFixed(LeftoverSize / EltSize), EltSize); } else { LeftoverTy = LLT::scalar(LeftoverSize); } @@ -215,29 +222,12 @@ void LegalizerHelper::insertParts(Register DstReg, return; } - unsigned PartSize = PartTy.getSizeInBits(); - unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); - - Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); - MIRBuilder.buildUndef(CurResultReg); - - unsigned Offset = 0; - for (Register PartReg : PartRegs) { - Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); - MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); - CurResultReg = NewResultReg; - Offset += PartSize; - } - - for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { - // Use the original output register for the final insert to avoid a copy. - Register NewResultReg = (I + 1 == E) ? - DstReg : MRI.createGenericVirtualRegister(ResultTy); - - MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); - CurResultReg = NewResultReg; - Offset += LeftoverPartSize; - } + SmallVector<Register> GCDRegs; + LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); + for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs)) + extractGCDType(GCDRegs, GCDTy, PartReg); + LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs); + buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); } /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. @@ -490,8 +480,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(const TargetInstrInfo &TII, - MachineInstr &MI) { +static bool isLibCallInTailPosition(MachineInstr &MI, + const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) { MachineBasicBlock &MBB = *MI.getParent(); const Function &F = MBB.getParent()->getFunction(); @@ -510,8 +501,47 @@ static bool isLibCallInTailPosition(const TargetInstrInfo &TII, CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) return false; - // Only tail call if the following instruction is a standard return. + // Only tail call if the following instruction is a standard return or if we + // have a `thisreturn` callee, and a sequence like: + // + // G_MEMCPY %0, %1, %2 + // $x0 = COPY %0 + // RET_ReallyLR implicit $x0 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); + if (Next != MBB.instr_end() && Next->isCopy()) { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unsupported opcode"); + case TargetOpcode::G_BZERO: + return false; + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + break; + } + + Register VReg = MI.getOperand(0).getReg(); + if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) + return false; + + Register PReg = Next->getOperand(0).getReg(); + if (!PReg.isPhysical()) + return false; + + auto Ret = next_nodbg(Next, MBB.instr_end()); + if (Ret == MBB.instr_end() || !Ret->isReturn()) + return false; + + if (Ret->getNumImplicitOperands() != 1) + return false; + + if (PReg != Ret->getOperand(0).getReg()) + return false; + + // Skip over the COPY that we just validated. + Next = Ret; + } + if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) return false; @@ -552,16 +582,17 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + // FIXME: What does the original arg index mean here? SmallVector<CallLowering::ArgInfo, 3> Args; for (unsigned i = 1; i < MI.getNumOperands(); i++) - Args.push_back({MI.getOperand(i).getReg(), OpType}); - return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, - Args); + Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); + return createLibcall(MIRBuilder, Libcall, + {MI.getOperand(0).getReg(), OpType, 0}, Args); } LegalizerHelper::LegalizeResult llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstr &MI) { + MachineInstr &MI, LostDebugLocObserver &LocObserver) { auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); SmallVector<CallLowering::ArgInfo, 3> Args; @@ -576,33 +607,47 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); else OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); - Args.push_back({Reg, OpTy}); + Args.push_back({Reg, OpTy, 0}); } auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); RTLIB::Libcall RTLibcall; - switch (MI.getOpcode()) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_BZERO: + RTLibcall = RTLIB::BZERO; + break; case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; + Args[0].Flags[0].setReturned(); break; case TargetOpcode::G_MEMMOVE: RTLibcall = RTLIB::MEMMOVE; + Args[0].Flags[0].setReturned(); break; case TargetOpcode::G_MEMSET: RTLibcall = RTLIB::MEMSET; + Args[0].Flags[0].setReturned(); break; default: - return LegalizerHelper::UnableToLegalize; + llvm_unreachable("unsupported opcode"); } const char *Name = TLI.getLibcallName(RTLibcall); + // Unsupported libcall on the target. + if (!Name) { + LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for " + << MIRBuilder.getTII().getName(Opc) << "\n"); + return LegalizerHelper::UnableToLegalize; + } + CallLowering::CallLoweringInfo Info; Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); - Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MIRBuilder.getTII(), MI); + isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -610,16 +655,24 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, if (Info.LoweredTailCall) { assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + + // Check debug locations before removing the return. + LocObserver.checkpoint(true); + // We must have a return following the call (or debug insts) to get past // isLibCallInTailPosition. do { MachineInstr *Next = MI.getNextNode(); - assert(Next && (Next->isReturn() || Next->isDebugInstr()) && + assert(Next && + (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && "Expected instr following MI to be return or debug inst?"); // We lowered a tail call, so the call is now the return from the block. // Delete the old return. Next->eraseFromParent(); } while (MI.getNextNode()); + + // We expect to lose the debug location from the return. + LocObserver.checkpoint(false); } return LegalizerHelper::Legalized; @@ -651,12 +704,13 @@ static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType) { RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); - return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, - {{MI.getOperand(1).getReg(), FromType}}); + return createLibcall(MIRBuilder, Libcall, + {MI.getOperand(0).getReg(), ToType, 0}, + {{MI.getOperand(1).getReg(), FromType, 0}}); } LegalizerHelper::LegalizeResult -LegalizerHelper::libcall(MachineInstr &MI) { +LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); @@ -748,10 +802,14 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } + case TargetOpcode::G_BZERO: case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { - LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + LegalizeResult Result = + createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver); + if (Result != Legalized) + return Result; MI.eraseFromParent(); return Result; } @@ -783,7 +841,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (SizeOp0 % NarrowSize != 0) { LLT ImplicitTy = NarrowTy; if (DstTy.isVector()) - ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy); + ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy); Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); MIRBuilder.buildAnyExt(DstReg, ImplicitReg); @@ -859,74 +917,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_FREEZE: return reduceOperationWidth(MI, TypeIdx, NarrowTy); - - case TargetOpcode::G_ADD: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - // Expand in terms of carry-setting/consuming G_ADDE instructions. - int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - - SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - - Register CarryIn; - for (int i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); - Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - - if (i == 0) - MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); - else { - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); - } - - DstRegs.push_back(DstReg); - CarryIn = CarryOut; - } - Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_SUB: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - - int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - - SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); - Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, - {Src1Regs[0], Src2Regs[0]}); - DstRegs.push_back(DstReg); - Register BorrowIn = BorrowOut; - for (int i = 1; i < NumParts; ++i) { - DstReg = MRI.createGenericVirtualRegister(NarrowTy); - BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - - MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, - {Src1Regs[i], Src2Regs[i], BorrowIn}); - - DstRegs.push_back(DstReg); - BorrowIn = BorrowOut; - } - MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_USUBE: + return narrowScalarAddSub(MI, TypeIdx, NarrowTy); case TargetOpcode::G_MUL: case TargetOpcode::G_UMULH: return narrowScalarMul(MI, NarrowTy); @@ -935,53 +936,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_INSERT: return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - auto &MMO = **MI.memoperands_begin(); - Register DstReg = MI.getOperand(0).getReg(); + auto &LoadMI = cast<GLoad>(MI); + Register DstReg = LoadMI.getDstReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) return UnableToLegalize; - if (8 * MMO.getSize() != DstTy.getSizeInBits()) { + if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); + MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO()); MIRBuilder.buildAnyExt(DstReg, TmpReg); - MI.eraseFromParent(); + LoadMI.eraseFromParent(); return Legalized; } - return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy); } case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_SEXTLOAD: { - bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); + auto &LoadMI = cast<GExtLoad>(MI); + Register DstReg = LoadMI.getDstReg(); + Register PtrReg = LoadMI.getPointerReg(); Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - auto &MMO = **MI.memoperands_begin(); + auto &MMO = LoadMI.getMMO(); unsigned MemSize = MMO.getSizeInBits(); if (MemSize == NarrowSize) { MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); } else if (MemSize < NarrowSize) { - MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); + MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO); } else if (MemSize > NarrowSize) { // FIXME: Need to split the load. return UnableToLegalize; } - if (ZExt) + if (isa<GZExtLoad>(LoadMI)) MIRBuilder.buildZExt(DstReg, TmpReg); else MIRBuilder.buildSExt(DstReg, TmpReg); - MI.eraseFromParent(); + LoadMI.eraseFromParent(); return Legalized; } case TargetOpcode::G_STORE: { - const auto &MMO = **MI.memoperands_begin(); + auto &StoreMI = cast<GStore>(MI); - Register SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = StoreMI.getValueReg(); LLT SrcTy = MRI.getType(SrcReg); if (SrcTy.isVector()) return UnableToLegalize; @@ -992,16 +993,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (SrcTy.isVector() && LeftoverBits != 0) return UnableToLegalize; - if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { + if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - auto &MMO = **MI.memoperands_begin(); MIRBuilder.buildTrunc(TmpReg, SrcReg); - MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO); - MI.eraseFromParent(); + MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO()); + StoreMI.eraseFromParent(); return Legalized; } - return reduceLoadStoreWidth(MI, 0, NarrowTy); + return reduceLoadStoreWidth(StoreMI, 0, NarrowTy); } case TargetOpcode::G_SELECT: return narrowScalarSelect(MI, TypeIdx, NarrowTy); @@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + unsigned NumParts = SizeOp0 / NarrowSize; SmallVector<Register, 2> DstRegs(NumParts); SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); @@ -1100,38 +1105,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_ICMP: { - uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - if (NarrowSize * 2 != SrcSize) + Register LHS = MI.getOperand(2).getReg(); + LLT SrcTy = MRI.getType(LHS); + uint64_t SrcSize = SrcTy.getSizeInBits(); + CmpInst::Predicate Pred = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + // TODO: Handle the non-equality case for weird sizes. + if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) return UnableToLegalize; - Observer.changingInstr(MI); - Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); + LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) + SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs; + if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, + LHSLeftoverRegs)) + return UnableToLegalize; - Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); + LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type. + SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs; + if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused, + RHSPartRegs, RHSLeftoverRegs)) + return UnableToLegalize; - CmpInst::Predicate Pred = - static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); - LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); - - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { - MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); - MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); - MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); - MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); - MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); + // We now have the LHS and RHS of the compare split into narrow-type + // registers, plus potentially some leftover type. + Register Dst = MI.getOperand(0).getReg(); + LLT ResTy = MRI.getType(Dst); + if (ICmpInst::isEquality(Pred)) { + // For each part on the LHS and RHS, keep track of the result of XOR-ing + // them together. For each equal part, the result should be all 0s. For + // each non-equal part, we'll get at least one 1. + auto Zero = MIRBuilder.buildConstant(NarrowTy, 0); + SmallVector<Register, 4> Xors; + for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0); + Xors.push_back(Xor); + } + + // Build a G_XOR for each leftover register. Each G_XOR must be widened + // to the desired narrow type so that we can OR them together later. + SmallVector<Register, 4> WidenedXors; + for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0); + LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor); + buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors, + /* PadStrategy = */ TargetOpcode::G_ZEXT); + Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end()); + } + + // Now, for each part we broke up, we know if they are equal/not equal + // based off the G_XOR. We can OR these all together and compare against + // 0 to get the result. + assert(Xors.size() >= 2 && "Should have gotten at least two Xors?"); + auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]); + for (unsigned I = 2, E = Xors.size(); I < E; ++I) + Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]); + MIRBuilder.buildICmp(Pred, Dst, Or, Zero); } else { + // TODO: Handle non-power-of-two types. + assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); + assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); + Register LHSL = LHSPartRegs[0]; + Register LHSH = LHSPartRegs[1]; + Register RHSL = RHSPartRegs[0]; + Register RHSH = RHSPartRegs[1]; MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); + MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); } - Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } @@ -1252,22 +1300,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; - } - case TargetOpcode::G_FPTOSI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); case TargetOpcode::G_FPEXT: if (TypeIdx != 0) return UnableToLegalize; @@ -1758,22 +1793,68 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { +LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { if (TypeIdx == 1) return UnableToLegalize; // TODO - unsigned Op = MI.getOpcode(); - unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO - ? TargetOpcode::G_ADD - : TargetOpcode::G_SUB; - unsigned ExtOpcode = - Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO - ? TargetOpcode::G_ZEXT - : TargetOpcode::G_SEXT; + + unsigned Opcode; + unsigned ExtOpcode; + Optional<Register> CarryIn = None; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + Opcode = TargetOpcode::G_ADD; + ExtOpcode = TargetOpcode::G_SEXT; + break; + case TargetOpcode::G_SSUBO: + Opcode = TargetOpcode::G_SUB; + ExtOpcode = TargetOpcode::G_SEXT; + break; + case TargetOpcode::G_UADDO: + Opcode = TargetOpcode::G_ADD; + ExtOpcode = TargetOpcode::G_ZEXT; + break; + case TargetOpcode::G_USUBO: + Opcode = TargetOpcode::G_SUB; + ExtOpcode = TargetOpcode::G_ZEXT; + break; + case TargetOpcode::G_SADDE: + Opcode = TargetOpcode::G_UADDE; + ExtOpcode = TargetOpcode::G_SEXT; + CarryIn = MI.getOperand(4).getReg(); + break; + case TargetOpcode::G_SSUBE: + Opcode = TargetOpcode::G_USUBE; + ExtOpcode = TargetOpcode::G_SEXT; + CarryIn = MI.getOperand(4).getReg(); + break; + case TargetOpcode::G_UADDE: + Opcode = TargetOpcode::G_UADDE; + ExtOpcode = TargetOpcode::G_ZEXT; + CarryIn = MI.getOperand(4).getReg(); + break; + case TargetOpcode::G_USUBE: + Opcode = TargetOpcode::G_USUBE; + ExtOpcode = TargetOpcode::G_ZEXT; + CarryIn = MI.getOperand(4).getReg(); + break; + } + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); // Do the arithmetic in the larger type. - auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}); + Register NewOp; + if (CarryIn) { + LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg()); + NewOp = MIRBuilder + .buildInstr(Opcode, {WideTy, CarryOutTy}, + {LHSExt, RHSExt, *CarryIn}) + .getReg(0); + } else { + NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0); + } LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); @@ -1830,10 +1911,105 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx == 1) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; + Register Result = MI.getOperand(0).getReg(); + Register OriginalOverflow = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + LLT SrcTy = MRI.getType(LHS); + LLT OverflowTy = MRI.getType(OriginalOverflow); + unsigned SrcBitWidth = SrcTy.getScalarSizeInBits(); + + // To determine if the result overflowed in the larger type, we extend the + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. + unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); + auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); + + auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, + {LeftOperand, RightOperand}); + auto Mul = Mulo->getOperand(0); + MIRBuilder.buildTrunc(Result, Mul); + + MachineInstrBuilder ExtResult; + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. + if (IsSigned) { + // For signed, overflow occurred when the high part does not sign-extend + // the low part. + ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth); + } else { + // Unsigned overflow occurred when the high part does not zero-extend the + // low part. + ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); + } + + // Multiplication cannot overflow if the WideTy is >= 2 * original width, + // so we don't need to check the overflow result of larger type Mulo. + if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { + auto Overflow = + MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); + // Finally check if the multiplication in the larger type itself overflowed. + MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow); + } else { + MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult); + } + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: + case TargetOpcode::G_ATOMICRMW_MIN: + case TargetOpcode::G_ATOMICRMW_MAX: + case TargetOpcode::G_ATOMICRMW_UMIN: + case TargetOpcode::G_ATOMICRMW_UMAX: + assert(TypeIdx == 0 && "atomicrmw with second scalar type"); + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_ATOMIC_CMPXCHG: + assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type"); + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: + if (TypeIdx == 0) { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + assert(TypeIdx == 1 && + "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type"); + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 1); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_EXTRACT: return widenScalarExtract(MI, TypeIdx, WideTy); case TargetOpcode::G_INSERT: @@ -1846,7 +2022,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_SSUBO: case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: - return widenScalarAddoSubo(MI, TypeIdx, WideTy); + case TargetOpcode::G_SADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_USUBE: + return widenScalarAddSubOverflow(MI, TypeIdx, WideTy); + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: + return widenScalarMulo(MI, TypeIdx, WideTy); case TargetOpcode::G_SADDSAT: case TargetOpcode::G_SSUBSAT: case TargetOpcode::G_SSHLSAT: @@ -1943,6 +2126,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_ABS: + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1959,6 +2149,21 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_SBFX: + case TargetOpcode::G_UBFX: + Observer.changingInstr(MI); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + } else { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); + } + + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SHL: Observer.changingInstr(MI); @@ -1986,6 +2191,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_SDIVREM: + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); + widenScalarDst(MI, WideTy); + widenScalarDst(MI, WideTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: Observer.changingInstr(MI); @@ -2016,6 +2230,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UDIVREM: + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + widenScalarDst(MI, WideTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SELECT: Observer.changingInstr(MI); if (TypeIdx == 0) { @@ -2202,9 +2425,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { LLT VecTy = MRI.getType(VecReg); Observer.changingInstr(MI); - widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), - WideTy.getSizeInBits()), - 1, TargetOpcode::G_SEXT); + widenScalarSrc( + MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, + TargetOpcode::G_SEXT); widenScalarDst(MI, WideTy, 0); Observer.changedInstr(MI); @@ -2225,7 +2448,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Register VecReg = MI.getOperand(1).getReg(); LLT VecTy = MRI.getType(VecReg); - LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); + LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy); widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); @@ -2385,7 +2608,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { // %3:_(<2 x s8>) = G_BITCAST %2 // %4:_(<2 x s8>) = G_BITCAST %3 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4 - DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy); + DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy); SrcPartTy = SrcEltTy; } else if (NumSrcElt > NumDstElt) { // Source element type is smaller. // @@ -2397,7 +2620,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { // %3:_(s16) = G_BITCAST %2 // %4:_(s16) = G_BITCAST %3 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4 - SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy); + SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy); DstCastTy = DstEltTy; } @@ -2488,7 +2711,8 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, // Type of the intermediate result vector. const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; - LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); + LLT MidTy = + LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy); auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); @@ -2654,125 +2878,168 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerLoad(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); + Register DstReg = LoadMI.getDstReg(); + Register PtrReg = LoadMI.getPointerReg(); LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); + MachineMemOperand &MMO = LoadMI.getMMO(); + LLT MemTy = MMO.getMemoryType(); + MachineFunction &MF = MIRBuilder.getMF(); + if (MemTy.isVector()) + return UnableToLegalize; - if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); - auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); - MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); - MI.eraseFromParent(); - return Legalized; + unsigned MemSizeInBits = MemTy.getSizeInBits(); + unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); + + if (MemSizeInBits != MemStoreSizeInBits) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy); + + Register LoadReg = DstReg; + LLT LoadTy = DstTy; + + // If this wasn't already an extending load, we need to widen the result + // register to avoid creating a load with a narrower result than the source. + if (MemStoreSizeInBits > DstTy.getSizeInBits()) { + LoadTy = WideMemTy; + LoadReg = MRI.createGenericVirtualRegister(WideMemTy); } - MIRBuilder.buildLoad(DstReg, PtrReg, MMO); - MI.eraseFromParent(); + if (isa<GSExtLoad>(LoadMI)) { + auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); + MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); + } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) { + auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from Wide thus automatically gives zext from MemVT. + MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits); + } else { + MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO); + } + + if (DstTy != LoadTy) + MIRBuilder.buildTrunc(DstReg, LoadReg); + + LoadMI.eraseFromParent(); return Legalized; } - if (DstTy.isScalar()) { - Register TmpReg = - MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); - MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_LOAD: - MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); - break; - case TargetOpcode::G_SEXTLOAD: - MIRBuilder.buildSExt(DstReg, TmpReg); - break; - case TargetOpcode::G_ZEXTLOAD: - MIRBuilder.buildZExt(DstReg, TmpReg); - break; - } + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(MemSizeInBits)) + return UnableToLegalize; // Don't know what we're being asked to do. - MI.eraseFromParent(); - return Legalized; + // Big endian lowering not implemented. + if (MIRBuilder.getDataLayout().isBigEndian()) + return UnableToLegalize; + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits); + uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize; + + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy, + PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), + LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy, + SmallPtr, *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + + if (AnyExtTy == DstTy) + MIRBuilder.buildOr(DstReg, Shift, LargeLoad); + else { + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or}); } - return UnableToLegalize; + LoadMI.eraseFromParent(); + return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerStore(MachineInstr &MI) { +LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { // Lower a non-power of 2 store into multiple pow-2 stores. // E.g. split an i24 store into an i16 store + i8 store. // We do this by first extending the stored value to the next largest power // of 2 type, and then using truncating stores to store the components. // By doing this, likewise with G_LOAD, generate an extend that can be // artifact-combined away instead of leaving behind extracts. - Register SrcReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); + Register SrcReg = StoreMI.getValueReg(); + Register PtrReg = StoreMI.getPointerReg(); LLT SrcTy = MRI.getType(SrcReg); - MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand &MMO = **StoreMI.memoperands_begin(); + LLT MemTy = MMO.getMemoryType(); + if (SrcTy.isVector()) return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) + + unsigned StoreWidth = MemTy.getSizeInBits(); + unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); + + if (StoreWidth != StoreSizeInBits) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + LLT WideTy = LLT::scalar(StoreSizeInBits); + + if (StoreSizeInBits > SrcTy.getSizeInBits()) { + // Avoid creating a store with a narrower source than result. + SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); + SrcTy = WideTy; + } + + auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth); + + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy); + MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO); + StoreMI.eraseFromParent(); + return Legalized; + } + + if (isPowerOf2_32(MemTy.getSizeInBits())) return UnableToLegalize; // Don't know what we're being asked to do. - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + // Extend to the next pow-2. If this store was itself the result of lowering, + // e.g. an s56 store being broken into s32 + s24, we might have a stored type + // that's wider the stored size. + const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg); // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); + uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); // Generate the PtrAdd and truncating stores. LLT PtrTy = MRI.getType(PtrReg); @@ -2780,16 +3047,15 @@ LegalizerHelper::lowerStore(MachineInstr &MI) { LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); - MachineFunction &MF = MIRBuilder.getMF(); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); MachineMemOperand *SmallMMO = MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); - MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); - MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); - MI.eraseFromParent(); + MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO); + StoreMI.eraseFromParent(); return Legalized; } @@ -2799,9 +3065,15 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { case TargetOpcode::G_LOAD: { if (TypeIdx != 0) return UnableToLegalize; + MachineMemOperand &MMO = **MI.memoperands_begin(); + + // Not sure how to interpret a bitcast of an extending load. + if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits()) + return UnableToLegalize; Observer.changingInstr(MI); bitcastDst(MI, CastTy, 0); + MMO.setType(CastTy); Observer.changedInstr(MI); return Legalized; } @@ -2809,8 +3081,15 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { if (TypeIdx != 0) return UnableToLegalize; + MachineMemOperand &MMO = **MI.memoperands_begin(); + + // Not sure how to interpret a bitcast of a truncating store. + if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits()) + return UnableToLegalize; + Observer.changingInstr(MI); bitcastSrc(MI, CastTy, 0); + MMO.setType(CastTy); Observer.changedInstr(MI); return Legalized; } @@ -2980,9 +3259,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: - return lowerLoad(MI); + return lowerLoad(cast<GAnyLoad>(MI)); case TargetOpcode::G_STORE: - return lowerStore(MI); + return lowerStore(cast<GStore>(MI)); case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -3132,24 +3411,19 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_SSHLSAT: case G_USHLSAT: return lowerShlSat(MI); - case G_ABS: { - // Expand %res = G_ABS %a into: - // %v1 = G_ASHR %a, scalar_size-1 - // %v2 = G_ADD %a, %v1 - // %res = G_XOR %v2, %v1 - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - Register OpReg = MI.getOperand(1).getReg(); - auto ShiftAmt = - MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); - auto Shift = - MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); - auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); - MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); - MI.eraseFromParent(); - return Legalized; - } + case G_ABS: + return lowerAbsToAddXor(MI); case G_SELECT: return lowerSelect(MI); + case G_SDIVREM: + case G_UDIVREM: + return lowerDIVREM(MI); + case G_FSHL: + case G_FSHR: + return lowerFunnelShift(MI); + case G_ROTL: + case G_ROTR: + return lowerRotate(MI); } } @@ -3248,9 +3522,6 @@ LegalizerHelper::fewerElementsVectorMultiEltType( return UnableToLegalize; const LLT NarrowTy0 = NarrowTyArg; - const unsigned NewNumElts = - NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; - const Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); LLT LeftoverTy0; @@ -3270,7 +3541,9 @@ LegalizerHelper::fewerElementsVectorMultiEltType( for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { Register SrcReg = MI.getOperand(I).getReg(); LLT SrcTyI = MRI.getType(SrcReg); - LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); + const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() + : ElementCount::getFixed(1); + LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); LLT LeftoverTyI; // Split this operand into the requested typed registers, and any leftover @@ -3345,7 +3618,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) return UnableToLegalize; - NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); + NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); } else { NumParts = DstTy.getNumElements(); NarrowTy1 = SrcTy.getElementType(); @@ -3389,9 +3662,9 @@ LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, NarrowTy0 = NarrowTy; NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); - NarrowTy1 = NarrowTy.isVector() ? - LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : - SrcTy.getElementType(); + NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), + SrcTy.getScalarSizeInBits()) + : SrcTy.getElementType(); } else { unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; @@ -3399,8 +3672,8 @@ LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : NarrowTy.getNumElements(); - NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), - DstTy.getScalarSizeInBits()); + NarrowTy0 = + LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); NarrowTy1 = NarrowTy; } @@ -3471,8 +3744,9 @@ LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, if (CondTy.getNumElements() == NumParts) NarrowTy1 = CondTy.getElementType(); else - NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, - CondTy.getScalarSizeInBits()); + NarrowTy1 = + LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), + CondTy.getScalarSizeInBits()); } } else { NumParts = CondTy.getNumElements(); @@ -3618,6 +3892,55 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register Result = MI.getOperand(0).getReg(); + Register Overflow = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + + LLT SrcTy = MRI.getType(LHS); + if (!SrcTy.isVector()) + return UnableToLegalize; + + LLT ElementType = SrcTy.getElementType(); + LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); + const ElementCount NumResult = SrcTy.getElementCount(); + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + + // Unmerge the operands to smaller parts of GCD type. + auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); + auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); + + const int NumOps = UnmergeLHS->getNumOperands() - 1; + const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); + LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); + LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); + + // Perform the operation over unmerged parts. + SmallVector<Register, 8> ResultParts; + SmallVector<Register, 8> OverflowParts; + for (int I = 0; I != NumOps; ++I) { + Register Operand1 = UnmergeLHS->getOperand(I).getReg(); + Register Operand2 = UnmergeRHS->getOperand(I).getReg(); + auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, + {Operand1, Operand2}); + ResultParts.push_back(PartMul->getOperand(0).getReg()); + OverflowParts.push_back(PartMul->getOperand(1).getReg()); + } + + LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); + LLT OverflowLCMTy = + LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); + + // Recombine the pieces to the original result and overflow registers. + buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); + buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); + MI.eraseFromParent(); + return Legalized; +} + // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces // a vector // @@ -3681,7 +4004,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, // If the index is a constant, we can really break this down as you would // expect, and index into the target size pieces. int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + auto MaybeCst = + getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, + /*HandleFConstants*/ false); + if (MaybeCst) { + IdxVal = MaybeCst->Value.getSExtValue(); // Avoid out of bounds indexing the pieces. if (IdxVal >= VecTy.getNumElements()) { MIRBuilder.buildUndef(DstReg); @@ -3731,27 +4058,24 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, } LegalizerHelper::LegalizeResult -LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, +LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. if (TypeIdx != 0) return UnableToLegalize; - MachineMemOperand *MMO = *MI.memoperands_begin(); - // This implementation doesn't work for atomics. Give up instead of doing // something invalid. - if (MMO->getOrdering() != AtomicOrdering::NotAtomic || - MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) + if (LdStMI.isAtomic()) return UnableToLegalize; - bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; - Register ValReg = MI.getOperand(0).getReg(); - Register AddrReg = MI.getOperand(1).getReg(); + bool IsLoad = isa<GLoad>(LdStMI); + Register ValReg = LdStMI.getReg(0); + Register AddrReg = LdStMI.getPointerReg(); LLT ValTy = MRI.getType(ValReg); // FIXME: Do we need a distinct NarrowMemory legalize action? - if (ValTy.getSizeInBits() != 8 * MMO->getSize()) { + if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) { LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); return UnableToLegalize; } @@ -3782,20 +4106,20 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, // is a load, return the new registers in ValRegs. For a store, each elements // of ValRegs should be PartTy. Returns the next offset that needs to be // handled. + auto MMO = LdStMI.getMMO(); auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, unsigned Offset) -> unsigned { MachineFunction &MF = MIRBuilder.getMF(); unsigned PartSize = PartTy.getSizeInBits(); for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; Offset += PartSize, ++Idx) { - unsigned ByteSize = PartSize / 8; unsigned ByteOffset = Offset / 8; Register NewAddrReg; MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); MachineMemOperand *NewMMO = - MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); + MF.getMachineMemOperand(&MMO, ByteOffset, PartTy); if (IsLoad) { Register Dst = MRI.createGenericVirtualRegister(PartTy); @@ -3820,7 +4144,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LeftoverTy, NarrowLeftoverRegs); } - MI.eraseFromParent(); + LdStMI.eraseFromParent(); return Legalized; } @@ -3830,28 +4154,32 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, assert(TypeIdx == 0 && "only one type index expected"); const unsigned Opc = MI.getOpcode(); - const int NumOps = MI.getNumOperands() - 1; - const Register DstReg = MI.getOperand(0).getReg(); + const int NumDefOps = MI.getNumExplicitDefs(); + const int NumSrcOps = MI.getNumOperands() - NumDefOps; const unsigned Flags = MI.getFlags(); const unsigned NarrowSize = NarrowTy.getSizeInBits(); const LLT NarrowScalarTy = LLT::scalar(NarrowSize); - assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources"); + assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " + "result and 1-3 sources or 2 results and " + "1-2 sources"); + + SmallVector<Register, 2> DstRegs; + for (int I = 0; I < NumDefOps; ++I) + DstRegs.push_back(MI.getOperand(I).getReg()); // First of all check whether we are narrowing (changing the element type) // or reducing the vector elements - const LLT DstTy = MRI.getType(DstReg); + const LLT DstTy = MRI.getType(DstRegs[0]); const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); SmallVector<Register, 8> ExtractedRegs[3]; SmallVector<Register, 8> Parts; - unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - // Break down all the sources into NarrowTy pieces we can operate on. This may // involve creating merges to a wider type, padded with undef. - for (int I = 0; I != NumOps; ++I) { - Register SrcReg = MI.getOperand(I + 1).getReg(); + for (int I = 0; I != NumSrcOps; ++I) { + Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); LLT SrcTy = MRI.getType(SrcReg); // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. @@ -3868,7 +4196,9 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); } } else { - OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType()); + auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() + : ElementCount::getFixed(1); + OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); } LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); @@ -3878,10 +4208,10 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, TargetOpcode::G_ANYEXT); } - SmallVector<Register, 8> ResultRegs; + SmallVector<Register, 8> ResultRegs[2]; // Input operands for each sub-instruction. - SmallVector<SrcOp, 4> InputRegs(NumOps, Register()); + SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register()); int NumParts = ExtractedRegs[0].size(); const unsigned DstSize = DstTy.getSizeInBits(); @@ -3903,33 +4233,44 @@ LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, for (int I = 0; I != NumRealParts; ++I) { // Emit this instruction on each of the split pieces. - for (int J = 0; J != NumOps; ++J) + for (int J = 0; J != NumSrcOps; ++J) InputRegs[J] = ExtractedRegs[J][I]; - auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); - ResultRegs.push_back(Inst.getReg(0)); + MachineInstrBuilder Inst; + if (NumDefOps == 1) + Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); + else + Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, + Flags); + + for (int J = 0; J != NumDefOps; ++J) + ResultRegs[J].push_back(Inst.getReg(J)); } // Fill out the widened result with undef instead of creating instructions // with undef inputs. int NumUndefParts = NumParts - NumRealParts; - if (NumUndefParts != 0) - ResultRegs.append(NumUndefParts, - MIRBuilder.buildUndef(NarrowDstTy).getReg(0)); + if (NumUndefParts != 0) { + Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); + for (int I = 0; I != NumDefOps; ++I) + ResultRegs[I].append(NumUndefParts, Undef); + } // Extract the possibly padded result. Use a scratch register if we need to do // a final bitcast, otherwise use the original result register. Register MergeDstReg; - if (IsNarrow && DstTy.isVector()) - MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); - else - MergeDstReg = DstReg; + for (int I = 0; I != NumDefOps; ++I) { + if (IsNarrow && DstTy.isVector()) + MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); + else + MergeDstReg = DstRegs[I]; - buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs); + buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); - // Recast to vector if we narrowed a vector - if (IsNarrow && DstTy.isVector()) - MIRBuilder.buildBitcast(DstReg, MergeDstReg); + // Recast to vector if we narrowed a vector + if (IsNarrow && DstTy.isVector()) + MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); + } MI.eraseFromParent(); return Legalized; @@ -4007,10 +4348,13 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_UDIV: case G_SREM: case G_UREM: + case G_SDIVREM: + case G_UDIVREM: case G_SMIN: case G_SMAX: case G_UMIN: case G_UMAX: + case G_ABS: case G_FMINNUM: case G_FMAXNUM: case G_FMINNUM_IEEE: @@ -4025,6 +4369,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_UADDSAT: case G_USUBSAT: return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case G_UMULO: + case G_SMULO: + return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -4071,14 +4418,286 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: - return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy); case G_SEXT_INREG: return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); + GISEL_VECREDUCE_CASES_NONSEQ + return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); + case G_SHUFFLE_VECTOR: + return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy); default: return UnableToLegalize; } } +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( + MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + LLT DstTy = MRI.getType(DstReg); + LLT Src1Ty = MRI.getType(Src1Reg); + LLT Src2Ty = MRI.getType(Src2Reg); + // The shuffle should be canonicalized by now. + if (DstTy != Src1Ty) + return UnableToLegalize; + if (DstTy != Src2Ty) + return UnableToLegalize; + + if (!isPowerOf2_32(DstTy.getNumElements())) + return UnableToLegalize; + + // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly. + // Further legalization attempts will be needed to do split further. + NarrowTy = + DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)); + unsigned NewElts = NarrowTy.getNumElements(); + + SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs; + extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs); + extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs); + Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0], + SplitSrc2Regs[1]}; + + Register Hi, Lo; + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + Register &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool UseBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = Mask[FirstMaskIdx + MaskOffset]; + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + UseBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (UseBuildVector) { + LLT EltTy = NarrowTy.getElementType(); + SmallVector<Register, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = Mask[FirstMaskIdx + MaskOffset]; + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(MIRBuilder + .buildExtractVectorElement( + EltTy, Inputs[Input], + MIRBuilder.buildConstant(LLT::scalar(32), Idx)) + .getReg(0)); + } + + // Construct the Lo/Hi output using a G_BUILD_VECTOR. + Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); + } else { + Register Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + Register Op1 = InputUsed[1] == -1U + ? MIRBuilder.buildUndef(NarrowTy).getReg(0) + : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0); + } + + Ops.clear(); + } + + MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi}); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( + MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { + unsigned Opc = MI.getOpcode(); + assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && + Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && + "Sequential reductions not expected"); + + if (TypeIdx != 1) + return UnableToLegalize; + + // The semantics of the normal non-sequential reductions allow us to freely + // re-associate the operation. + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) + return UnableToLegalize; + + SmallVector<Register> SplitSrcs; + const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); + extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); + SmallVector<Register> PartialReductions; + for (unsigned Part = 0; Part < NumParts; ++Part) { + PartialReductions.push_back( + MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); + } + + unsigned ScalarOpc; + switch (Opc) { + case TargetOpcode::G_VECREDUCE_FADD: + ScalarOpc = TargetOpcode::G_FADD; + break; + case TargetOpcode::G_VECREDUCE_FMUL: + ScalarOpc = TargetOpcode::G_FMUL; + break; + case TargetOpcode::G_VECREDUCE_FMAX: + ScalarOpc = TargetOpcode::G_FMAXNUM; + break; + case TargetOpcode::G_VECREDUCE_FMIN: + ScalarOpc = TargetOpcode::G_FMINNUM; + break; + case TargetOpcode::G_VECREDUCE_ADD: + ScalarOpc = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_VECREDUCE_MUL: + ScalarOpc = TargetOpcode::G_MUL; + break; + case TargetOpcode::G_VECREDUCE_AND: + ScalarOpc = TargetOpcode::G_AND; + break; + case TargetOpcode::G_VECREDUCE_OR: + ScalarOpc = TargetOpcode::G_OR; + break; + case TargetOpcode::G_VECREDUCE_XOR: + ScalarOpc = TargetOpcode::G_XOR; + break; + case TargetOpcode::G_VECREDUCE_SMAX: + ScalarOpc = TargetOpcode::G_SMAX; + break; + case TargetOpcode::G_VECREDUCE_SMIN: + ScalarOpc = TargetOpcode::G_SMIN; + break; + case TargetOpcode::G_VECREDUCE_UMAX: + ScalarOpc = TargetOpcode::G_UMAX; + break; + case TargetOpcode::G_VECREDUCE_UMIN: + ScalarOpc = TargetOpcode::G_UMIN; + break; + default: + LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); + return UnableToLegalize; + } + + // If the types involved are powers of 2, we can generate intermediate vector + // ops, before generating a final reduction operation. + if (isPowerOf2_32(SrcTy.getNumElements()) && + isPowerOf2_32(NarrowTy.getNumElements())) { + return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc); + } + + Register Acc = PartialReductions[0]; + for (unsigned Part = 1; Part < NumParts; ++Part) { + if (Part == NumParts - 1) { + MIRBuilder.buildInstr(ScalarOpc, {DstReg}, + {Acc, PartialReductions[Part]}); + } else { + Acc = MIRBuilder + .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]}) + .getReg(0); + } + } + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg, + LLT SrcTy, LLT NarrowTy, + unsigned ScalarOpc) { + SmallVector<Register> SplitSrcs; + // Split the sources into NarrowTy size pieces. + extractParts(SrcReg, NarrowTy, + SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs); + // We're going to do a tree reduction using vector operations until we have + // one NarrowTy size value left. + while (SplitSrcs.size() > 1) { + SmallVector<Register> PartialRdxs; + for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) { + Register LHS = SplitSrcs[Idx]; + Register RHS = SplitSrcs[Idx + 1]; + // Create the intermediate vector op. + Register Res = + MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0); + PartialRdxs.push_back(Res); + } + SplitSrcs = std::move(PartialRdxs); + } + // Finally generate the requested NarrowTy based reduction. + Observer.changingInstr(MI); + MI.getOperand(1).setReg(SplitSrcs[0]); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, const LLT HalfTy, const LLT AmtTy) { @@ -4388,11 +5007,56 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); default: return UnableToLegalize; } } +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, + unsigned int TypeIdx, LLT MoreTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + LLT DstTy = MRI.getType(DstReg); + LLT Src1Ty = MRI.getType(Src1Reg); + LLT Src2Ty = MRI.getType(Src2Reg); + unsigned NumElts = DstTy.getNumElements(); + unsigned WidenNumElts = MoreTy.getNumElements(); + + // Expect a canonicalized shuffle. + if (DstTy != Src1Ty || DstTy != Src2Ty) + return UnableToLegalize; + + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned I = 0; I != NumElts; ++I) { + int Idx = Mask[I]; + if (Idx < static_cast<int>(NumElts)) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned I = NumElts; I != WidenNumElts; ++I) + NewMask.push_back(-1); + moreElementsVectorDst(MI, MoreTy, 0); + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg(), + MI.getOperand(2).getReg(), NewMask); + MI.eraseFromParent(); + return Legalized; +} + void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, ArrayRef<Register> Src1Regs, ArrayRef<Register> Src2Regs, @@ -4457,6 +5121,100 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstType = MRI.getType(DstReg); + // FIXME: add support for vector types + if (DstType.isVector()) + return UnableToLegalize; + + unsigned Opcode = MI.getOpcode(); + unsigned OpO, OpE, OpF; + switch (Opcode) { + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_ADD: + OpO = TargetOpcode::G_UADDO; + OpE = TargetOpcode::G_UADDE; + OpF = TargetOpcode::G_UADDE; + if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE) + OpF = TargetOpcode::G_SADDE; + break; + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SUB: + OpO = TargetOpcode::G_USUBO; + OpE = TargetOpcode::G_USUBE; + OpF = TargetOpcode::G_USUBE; + if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE) + OpF = TargetOpcode::G_SSUBE; + break; + default: + llvm_unreachable("Unexpected add/sub opcode!"); + } + + // 1 for a plain add/sub, 2 if this is an operation with a carry-out. + unsigned NumDefs = MI.getNumExplicitDefs(); + Register Src1 = MI.getOperand(NumDefs).getReg(); + Register Src2 = MI.getOperand(NumDefs + 1).getReg(); + Register CarryDst, CarryIn; + if (NumDefs == 2) + CarryDst = MI.getOperand(1).getReg(); + if (MI.getNumOperands() == NumDefs + 3) + CarryIn = MI.getOperand(NumDefs + 2).getReg(); + + LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); + LLT LeftoverTy, DummyTy; + SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs; + extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left); + extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left); + + int NarrowParts = Src1Regs.size(); + for (int I = 0, E = Src1Left.size(); I != E; ++I) { + Src1Regs.push_back(Src1Left[I]); + Src2Regs.push_back(Src2Left[I]); + } + DstRegs.reserve(Src1Regs.size()); + + for (int i = 0, e = Src1Regs.size(); i != e; ++i) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i])); + Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + // Forward the final carry-out to the destination register + if (i == e - 1 && CarryDst) + CarryOut = CarryDst; + + if (!CarryIn) { + MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, + {Src1Regs[i], Src2Regs[i]}); + } else if (i == e - 1) { + MIRBuilder.buildInstr(OpF, {DstReg, CarryOut}, + {Src1Regs[i], Src2Regs[i], CarryIn}); + } else { + MIRBuilder.buildInstr(OpE, {DstReg, CarryOut}, + {Src1Regs[i], Src2Regs[i], CarryIn}); + } + + DstRegs.push_back(DstReg); + CarryIn = CarryOut; + } + insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy, + makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, + makeArrayRef(DstRegs).drop_front(NarrowParts)); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); Register Src1 = MI.getOperand(1).getReg(); @@ -4492,6 +5250,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { if (TypeIdx != 1) @@ -4565,37 +5348,43 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, if (TypeIdx != 0) return UnableToLegalize; - uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - uint64_t NarrowSize = NarrowTy.getSizeInBits(); - - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - - int NumParts = SizeOp0 / NarrowSize; - - SmallVector<Register, 2> SrcRegs, DstRegs; + SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); + LLT LeftoverTy; + extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs, + LeftoverRegs); + + for (Register Reg : LeftoverRegs) + SrcRegs.push_back(Reg); + uint64_t NarrowSize = NarrowTy.getSizeInBits(); Register OpReg = MI.getOperand(2).getReg(); uint64_t OpStart = MI.getOperand(3).getImm(); uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned DstStart = i * NarrowSize; + for (int I = 0, E = SrcRegs.size(); I != E; ++I) { + unsigned DstStart = I * NarrowSize; - if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { - // No part of the insert affects this subregister, forward the original. - DstRegs.push_back(SrcRegs[i]); - continue; - } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { // The entire subregister is defined by this insert, forward the new // value. DstRegs.push_back(OpReg); continue; } + Register SrcReg = SrcRegs[I]; + if (MRI.getType(SrcRegs[I]) == LeftoverTy) { + // The leftover reg is smaller than NarrowTy, so we need to extend it. + SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]); + } + + if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { + // No part of the insert affects this subregister, forward the original. + DstRegs.push_back(SrcReg); + continue; + } + // OpSegStart is where this destination segment would start in OpReg if it // extended infinitely in both directions. int64_t ExtractOffset, InsertOffset; @@ -4619,16 +5408,19 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, } Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); + MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset); DstRegs.push_back(DstReg); } - assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); + uint64_t WideSize = DstRegs.size() * NarrowSize; Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else + if (WideSize > RegTy.getSizeInBits()) { + Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); + MIRBuilder.buildMerge(MergeReg, DstRegs); + MIRBuilder.buildTrunc(DstReg, MergeReg); + } else MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); return Legalized; } @@ -5002,6 +5794,209 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { } } +// Check that (every element of) Reg is undef or not an exact multiple of BW. +static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, + Register Reg, unsigned BW) { + return matchUnaryPredicate( + MRI, Reg, + [=](const Constant *C) { + // Null constant here means an undef. + const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C); + return !CI || CI->getValue().urem(BW) != 0; + }, + /*AllowUndefs*/ true); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + Register Z = MI.getOperand(3).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(Z); + + unsigned BW = Ty.getScalarSizeInBits(); + + if (!isPowerOf2_32(BW)) + return UnableToLegalize; + + const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; + + if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { + // fshl X, Y, Z -> fshr X, Y, -Z + // fshr X, Y, Z -> fshl X, Y, -Z + auto Zero = MIRBuilder.buildConstant(ShTy, 0); + Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0); + } else { + // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z + // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z + auto One = MIRBuilder.buildConstant(ShTy, 1); + if (IsFSHL) { + Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); + X = MIRBuilder.buildLShr(Ty, X, One).getReg(0); + } else { + X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); + Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0); + } + + Z = MIRBuilder.buildNot(ShTy, Z).getReg(0); + } + + MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z}); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + Register Z = MI.getOperand(3).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(Z); + + const unsigned BW = Ty.getScalarSizeInBits(); + const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + + Register ShX, ShY; + Register ShAmt, InvShAmt; + + // FIXME: Emit optimized urem by constant instead of letting it expand later. + if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); + ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); + InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0); + ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0); + ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + auto NotZ = MIRBuilder.buildNot(ShTy, Z); + InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0); + } else { + auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); + ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); + InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0); + } + + auto One = MIRBuilder.buildConstant(ShTy, 1); + if (IsFSHL) { + ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0); + auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One); + ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0); + } else { + auto ShX1 = MIRBuilder.buildShl(Ty, X, One); + ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0); + ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0); + } + } + + MIRBuilder.buildOr(Dst, ShX, ShY); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { + // These operations approximately do the following (while avoiding undefined + // shifts by BW): + // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(MI.getOperand(3).getReg()); + + bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; + + // TODO: Use smarter heuristic that accounts for vector legalization. + if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower) + return lowerFunnelShiftAsShifts(MI); + + // This only works for powers of 2, fallback to shifts if it fails. + LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI); + if (Result == UnableToLegalize) + return lowerFunnelShiftAsShifts(MI); + return Result; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Amt = MI.getOperand(2).getReg(); + LLT AmtTy = MRI.getType(Amt); + auto Zero = MIRBuilder.buildConstant(AmtTy, 0); + bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; + unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; + auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt); + MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg}); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Amt = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Dst); + LLT AmtTy = MRI.getType(Amt); + + unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); + bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; + + MIRBuilder.setInstrAndDebugLoc(MI); + + // If a rotate in the other direction is supported, use it. + unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; + if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) && + isPowerOf2_32(EltSizeInBits)) + return lowerRotateWithReverseRotate(MI); + + auto Zero = MIRBuilder.buildConstant(AmtTy, 0); + unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR; + unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; + auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1); + Register ShVal; + Register RevShiftVal; + if (isPowerOf2_32(EltSizeInBits)) { + // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) + // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) + auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt); + auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC); + ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); + auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC); + RevShiftVal = + MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0); + } else { + // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) + // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) + auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits); + auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC); + ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); + auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt); + auto One = MIRBuilder.buildConstant(AmtTy, 1); + auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One}); + RevShiftVal = + MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0); + } + MIRBuilder.buildOr(Dst, ShVal, RevShiftVal); + MI.eraseFromParent(); + return Legalized; +} + // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float // representation. LegalizerHelper::LegalizeResult @@ -5192,7 +6187,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: - // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c + // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); @@ -5429,31 +6424,27 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) { auto NotSignBitMask = MIRBuilder.buildConstant( Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); - auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); - MachineInstr *Or; - + Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0); + Register And1; if (Src0Ty == Src1Ty) { - auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask); - Or = MIRBuilder.buildOr(Dst, And0, And1); + And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0); } else if (Src0Size > Src1Size) { auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); - auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); - Or = MIRBuilder.buildOr(Dst, And0, And1); + And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0); } else { auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); - auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); - Or = MIRBuilder.buildOr(Dst, And0, And1); + And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0); } // Be careful about setting nsz/nnan/ninf on every instruction, since the // constants are a nan and -0.0, but the final result should preserve // everything. - if (unsigned Flags = MI.getFlags()) - Or->setFlags(Flags); + unsigned Flags = MI.getFlags(); + MIRBuilder.buildOr(Dst, And0, And1, Flags); MI.eraseFromParent(); return Legalized; @@ -6254,3 +7245,51 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) { + // Split DIVREM into individual instructions. + unsigned Opcode = MI.getOpcode(); + + MIRBuilder.buildInstr( + Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV + : TargetOpcode::G_UDIV, + {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); + MIRBuilder.buildInstr( + Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM + : TargetOpcode::G_UREM, + {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) { + // Expand %res = G_ABS %a into: + // %v1 = G_ASHR %a, scalar_size-1 + // %v2 = G_ADD %a, %v1 + // %res = G_XOR %v2, %v1 + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register OpReg = MI.getOperand(1).getReg(); + auto ShiftAmt = + MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); + auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); + MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) { + // Expand %res = G_ABS %a into: + // %v1 = G_CONSTANT 0 + // %v2 = G_SUB %v1, %a + // %res = G_SMAX %a, %v2 + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(SrcReg); + auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0); + auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0); + MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub); + MI.eraseFromParent(); + return Legalized; +} diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 30acac14bc5f..3e3141657e87 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -9,11 +9,6 @@ // Implement an interface to specify and query how an illegal operation on a // given type should be expanded. // -// Issues to be resolved: -// + Make it fast. -// + Support weird types like i3, <7 x i3>, ... -// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...) -// //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -93,7 +88,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { OS << Opcode << ", MMOs={"; for (const auto &MMODescr : MMODescrs) { - OS << MMODescr.SizeInBits << ", "; + OS << MMODescr.MemoryTy << ", "; } OS << "}"; @@ -256,146 +251,6 @@ bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const { #endif } -LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { - // Set defaults. - // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the - // fundamental load/store Jakob proposed. Once loads & stores are supported. - setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); - setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); - setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); - setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); - setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); - - setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); - setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); - - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); - - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); - setLegalizeScalarToDifferentSizeStrategy( - TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); - setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); -} - -void LegalizerInfo::computeTables() { - assert(TablesInitialized == false); - - for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { - const unsigned Opcode = FirstOp + OpcodeIdx; - for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); - ++TypeIdx) { - // 0. Collect information specified through the setAction API, i.e. - // for specific bit sizes. - // For scalar types: - SizeAndActionsVec ScalarSpecifiedActions; - // For pointer types: - std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; - // For vector types: - std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; - for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { - const LLT Type = LLT2Action.first; - const LegalizeAction Action = LLT2Action.second; - - auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); - if (Type.isPointer()) - AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( - SizeAction); - else if (Type.isVector()) - ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] - .push_back(SizeAction); - else - ScalarSpecifiedActions.push_back(SizeAction); - } - - // 1. Handle scalar types - { - // Decide how to handle bit sizes for which no explicit specification - // was given. - SizeChangeStrategy S = &unsupportedForDifferentSizes; - if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && - ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) - S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; - llvm::sort(ScalarSpecifiedActions); - checkPartialSizeAndActionsVector(ScalarSpecifiedActions); - setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); - } - - // 2. Handle pointer types - for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { - llvm::sort(PointerSpecifiedActions.second); - checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); - // For pointer types, we assume that there isn't a meaningfull way - // to change the number of bits used in the pointer. - setPointerAction( - Opcode, TypeIdx, PointerSpecifiedActions.first, - unsupportedForDifferentSizes(PointerSpecifiedActions.second)); - } - - // 3. Handle vector types - SizeAndActionsVec ElementSizesSeen; - for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { - llvm::sort(VectorSpecifiedActions.second); - const uint16_t ElementSize = VectorSpecifiedActions.first; - ElementSizesSeen.push_back({ElementSize, Legal}); - checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); - // For vector types, we assume that the best way to adapt the number - // of elements is to the next larger number of elements type for which - // the vector type is legal, unless there is no such type. In that case, - // legalize towards a vector type with a smaller number of elements. - SizeAndActionsVec NumElementsActions; - for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { - assert(BitsizeAndAction.first % ElementSize == 0); - const uint16_t NumElements = BitsizeAndAction.first / ElementSize; - NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); - } - setVectorNumElementAction( - Opcode, TypeIdx, ElementSize, - moreToWiderTypesAndLessToWidest(NumElementsActions)); - } - llvm::sort(ElementSizesSeen); - SizeChangeStrategy VectorElementSizeChangeStrategy = - &unsupportedForDifferentSizes; - if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && - VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) - VectorElementSizeChangeStrategy = - VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; - setScalarInVectorAction( - Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); - } - } - - TablesInitialized = true; -} - -// FIXME: inefficient implementation for now. Without ComputeValueVTs we're -// probably going to need specialized lookup structures for various types before -// we have any hope of doing well with something like <13 x i3>. Even the common -// cases should do better than what we have now. -std::pair<LegalizeAction, LLT> -LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const { - assert(TablesInitialized && "backend forgot to call computeTables"); - // These *have* to be implemented for now, they're the fundamental basis of - // how everything else is transformed. - if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) - return findScalarLegalAction(Aspect); - assert(Aspect.Type.isVector()); - return findVectorLegalAction(Aspect); -} - /// Helper function to get LLT for the given type index. static LLT getTypeFromTypeIdx(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned OpIdx, @@ -446,8 +301,8 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); - for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) - aliasActionDefinitions(Representative, *I); + for (unsigned Op : llvm::drop_begin(Opcodes)) + aliasActionDefinitions(Representative, Op); auto &Return = getActionDefinitionsBuilder(Representative); Return.setIsAliasedByAnother(); @@ -469,23 +324,13 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const { return Step; } - for (unsigned i = 0; i < Query.Types.size(); ++i) { - auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); - if (Action.first != Legal) { - LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action=" - << Action.first << ", " << Action.second << "\n"); - return {Action.first, i, Action.second}; - } else - LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); - } - LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n"); - return {Legal, 0, LLT{}}; + return getLegacyLegalizerInfo().getAction(Query); } LegalizeActionStep LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - SmallVector<LLT, 2> Types; + SmallVector<LLT, 8> Types; SmallBitVector SeenTypes(8); const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; // FIXME: probably we'll need to cache the results here somehow? @@ -507,8 +352,8 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back({8 * MMO->getSize() /* in bits */, - 8 * MMO->getAlign().value(), MMO->getOrdering()}); + MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(), + MMO->getSuccessOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); } @@ -526,163 +371,6 @@ bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI, return Action == Legal || Action == Custom; } -LegalizerInfo::SizeAndActionsVec -LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( - const SizeAndActionsVec &v, LegalizeAction IncreaseAction, - LegalizeAction DecreaseAction) { - SizeAndActionsVec result; - unsigned LargestSizeSoFar = 0; - if (v.size() >= 1 && v[0].first != 1) - result.push_back({1, IncreaseAction}); - for (size_t i = 0; i < v.size(); ++i) { - result.push_back(v[i]); - LargestSizeSoFar = v[i].first; - if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { - result.push_back({LargestSizeSoFar + 1, IncreaseAction}); - LargestSizeSoFar = v[i].first + 1; - } - } - result.push_back({LargestSizeSoFar + 1, DecreaseAction}); - return result; -} - -LegalizerInfo::SizeAndActionsVec -LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( - const SizeAndActionsVec &v, LegalizeAction DecreaseAction, - LegalizeAction IncreaseAction) { - SizeAndActionsVec result; - if (v.size() == 0 || v[0].first != 1) - result.push_back({1, IncreaseAction}); - for (size_t i = 0; i < v.size(); ++i) { - result.push_back(v[i]); - if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { - result.push_back({v[i].first + 1, DecreaseAction}); - } - } - return result; -} - -LegalizerInfo::SizeAndAction -LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { - assert(Size >= 1); - // Find the last element in Vec that has a bitsize equal to or smaller than - // the requested bit size. - // That is the element just before the first element that is bigger than Size. - auto It = partition_point( - Vec, [=](const SizeAndAction &A) { return A.first <= Size; }); - assert(It != Vec.begin() && "Does Vec not start with size 1?"); - int VecIdx = It - Vec.begin() - 1; - - LegalizeAction Action = Vec[VecIdx].second; - switch (Action) { - case Legal: - case Bitcast: - case Lower: - case Libcall: - case Custom: - return {Size, Action}; - case FewerElements: - // FIXME: is this special case still needed and correct? - // Special case for scalarization: - if (Vec == SizeAndActionsVec({{1, FewerElements}})) - return {1, FewerElements}; - LLVM_FALLTHROUGH; - case NarrowScalar: { - // The following needs to be a loop, as for now, we do allow needing to - // go over "Unsupported" bit sizes before finding a legalizable bit size. - // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, - // we need to iterate over s9, and then to s32 to return (s32, Legal). - // If we want to get rid of the below loop, we should have stronger asserts - // when building the SizeAndActionsVecs, probably not allowing - // "Unsupported" unless at the ends of the vector. - for (int i = VecIdx - 1; i >= 0; --i) - if (!needsLegalizingToDifferentSize(Vec[i].second) && - Vec[i].second != Unsupported) - return {Vec[i].first, Action}; - llvm_unreachable(""); - } - case WidenScalar: - case MoreElements: { - // See above, the following needs to be a loop, at least for now. - for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) - if (!needsLegalizingToDifferentSize(Vec[i].second) && - Vec[i].second != Unsupported) - return {Vec[i].first, Action}; - llvm_unreachable(""); - } - case Unsupported: - return {Size, Unsupported}; - case NotFound: - case UseLegacyRules: - llvm_unreachable("NotFound"); - } - llvm_unreachable("Action has an unknown enum value"); -} - -std::pair<LegalizeAction, LLT> -LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { - assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); - if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) - return {NotFound, LLT()}; - const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); - if (Aspect.Type.isPointer() && - AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == - AddrSpace2PointerActions[OpcodeIdx].end()) { - return {NotFound, LLT()}; - } - const SmallVector<SizeAndActionsVec, 1> &Actions = - Aspect.Type.isPointer() - ? AddrSpace2PointerActions[OpcodeIdx] - .find(Aspect.Type.getAddressSpace()) - ->second - : ScalarActions[OpcodeIdx]; - if (Aspect.Idx >= Actions.size()) - return {NotFound, LLT()}; - const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; - // FIXME: speed up this search, e.g. by using a results cache for repeated - // queries? - auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); - return {SizeAndAction.second, - Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) - : LLT::pointer(Aspect.Type.getAddressSpace(), - SizeAndAction.first)}; -} - -std::pair<LegalizeAction, LLT> -LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { - assert(Aspect.Type.isVector()); - // First legalize the vector element size, then legalize the number of - // lanes in the vector. - if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) - return {NotFound, Aspect.Type}; - const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); - const unsigned TypeIdx = Aspect.Idx; - if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) - return {NotFound, Aspect.Type}; - const SizeAndActionsVec &ElemSizeVec = - ScalarInVectorActions[OpcodeIdx][TypeIdx]; - - LLT IntermediateType; - auto ElementSizeAndAction = - findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); - IntermediateType = - LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first); - if (ElementSizeAndAction.second != Legal) - return {ElementSizeAndAction.second, IntermediateType}; - - auto i = NumElements2Actions[OpcodeIdx].find( - IntermediateType.getScalarSizeInBits()); - if (i == NumElements2Actions[OpcodeIdx].end()) { - return {NotFound, IntermediateType}; - } - const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; - auto NumElementsAndAction = - findAction(NumElementsVec, IntermediateType.getNumElements()); - return {NumElementsAndAction.second, - LLT::vector(NumElementsAndAction.first, - IntermediateType.getScalarSizeInBits())}; -} - unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const { return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; } diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 30c00c63f6f4..d45fdae43f01 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -82,8 +82,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, // we start doing CSE across blocks. auto &MBB = MF.front(); auto &TL = *MF.getSubtarget().getTargetLowering(); - for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { - MachineInstr &MI = *RI; + for (MachineInstr &MI : llvm::reverse(MBB)) { if (!TL.shouldLocalize(MI, TTI)) continue; LLVM_DEBUG(dbgs() << "Should localize: " << MI); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 67ef02a4e7b2..54ac62793b08 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -240,6 +240,18 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, return buildInstr(TargetOpcode::COPY, Res, Op); } +MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res, + const SrcOp &Op, + unsigned Size) { + return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size); +} + +MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res, + const SrcOp &Op, + unsigned Size) { + return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size); +} + MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, const ConstantInt &Val) { LLT Ty = Res.getLLTTy(*getMRI()); @@ -335,10 +347,9 @@ MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); + LLT Ty = Dst.getLLTTy(*getMRI()); MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo); return buildLoad(Dst, Addr, *MMO); } @@ -361,7 +372,7 @@ MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset( MachineMemOperand &BaseMMO, int64_t Offset) { LLT LoadTy = Dst.getLLTTy(*getMRI()); MachineMemOperand *OffsetMMO = - getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy.getSizeInBytes()); + getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy); if (Offset == 0) // This may be a size or type changing load. return buildLoad(Dst, BasePtr, *OffsetMMO); @@ -394,10 +405,9 @@ MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); + LLT Ty = Val.getLLTTy(*getMRI()); MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo); return buildStore(Val, Addr, *MMO); } @@ -474,6 +484,15 @@ MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res, return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op); } +MachineInstrBuilder MachineIRBuilder::buildZExtInReg(const DstOp &Res, + const SrcOp &Op, + int64_t ImmOp) { + LLT ResTy = Res.getLLTTy(*getMRI()); + auto Mask = buildConstant( + ResTy, APInt::getLowBitsSet(ResTy.getScalarSizeInBits(), ImmOp)); + return buildAnd(Res, Op, Mask); +} + MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst, const SrcOp &Src) { LLT SrcTy = Src.getLLTTy(*getMRI()); @@ -657,10 +676,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); assert(DstTy.getElementType() == Src1Ty.getElementType() && DstTy.getElementType() == Src2Ty.getElementType()); + (void)DstTy; (void)Src1Ty; (void)Src2Ty; ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask); - return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) + return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {Res}, {Src1, Src2}) .addShuffleMask(MaskAlloc); } @@ -1095,7 +1115,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, DstOps[0].getLLTTy(*getMRI()); }) && "type mismatch in output list"); - assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)DstOps.size() * + DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input operands do not cover output register"); break; @@ -1109,7 +1130,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input operands do not cover output register"); if (SrcOps.size() == 1) @@ -1160,7 +1182,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input scalars do not exactly cover the output vector register"); break; @@ -1193,7 +1216,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI())); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input vectors do not exactly cover the output vector register"); break; diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 356e0e437d32..644a81d8021e 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -622,6 +622,23 @@ bool RegBankSelect::applyMapping( bool RegBankSelect::assignInstr(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Assign: " << MI); + + unsigned Opc = MI.getOpcode(); + if (isPreISelGenericOptimizationHint(Opc)) { + assert((Opc == TargetOpcode::G_ASSERT_ZEXT || + Opc == TargetOpcode::G_ASSERT_SEXT) && + "Unexpected hint opcode!"); + // The only correct mapping for these is to always use the source register + // bank. + const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg()); + // We can assume every instruction above this one has a selected register + // bank. + assert(RB && "Expected source register to have a register bank?"); + LLVM_DEBUG(dbgs() << "... Hint always uses source's register bank.\n"); + MRI->setRegBank(MI.getOperand(0).getReg(), *RB); + return true; + } + // Remember the repairing placement for all the operands. SmallVector<RepairingPlacement, 4> RepairPts; @@ -702,6 +719,10 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { if (MI.isDebugInstr()) continue; + // Ignore IMPLICIT_DEF which must have a regclass. + if (MI.isImplicitDef()) + continue; + if (!assignInstr(MI)) { reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", "unable to map instruction", MI); diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index fc9c802693ab..5c4d18ad79c5 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" @@ -98,17 +99,12 @@ void RegisterBank::print(raw_ostream &OS, bool IsForDebug, return; assert(ContainedRegClasses.size() == TRI->getNumRegClasses() && "TRI does not match the initialization process?"); - bool IsFirst = true; OS << "Covered register classes:\n"; + ListSeparator LS; for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) { const TargetRegisterClass &RC = *TRI->getRegClass(RCId); - if (!covers(RC)) - continue; - - if (!IsFirst) - OS << ", "; - OS << TRI->getRegClassName(&RC); - IsFirst = false; + if (covers(RC)) + OS << LS << TRI->getRegClassName(&RC); } } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index cd2483224489..f64e41b9dccc 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -199,6 +200,10 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, // Don't delete frame allocation labels. if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) return false; + // LIFETIME markers should be preserved even if they seem dead. + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) + return false; // If we can move an instruction, we can remove it. Otherwise, it has // a side-effect of some sort. @@ -360,6 +365,14 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return ValueAndVReg{Val, VReg}; } +const ConstantInt *llvm::getConstantIntVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + return nullptr; + return MI->getOperand(1).getCImm(); +} + const ConstantFP * llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); @@ -375,13 +388,15 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) return None; - while (DefMI->getOpcode() == TargetOpcode::COPY) { + unsigned Opc = DefMI->getOpcode(); + while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid()) break; DefMI = MRI.getVRegDef(SrcReg); DefSrcReg = SrcReg; + Opc = DefMI->getOpcode(); } return DefinitionAndSourceRegister{DefMI, DefSrcReg}; } @@ -474,6 +489,60 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, return None; } +Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { + const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI); + if (!Op2Cst) + return None; + + const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI); + if (!Op1Cst) + return None; + + APFloat C1 = Op1Cst->getValueAPF(); + const APFloat &C2 = Op2Cst->getValueAPF(); + switch (Opcode) { + case TargetOpcode::G_FADD: + C1.add(C2, APFloat::rmNearestTiesToEven); + return C1; + case TargetOpcode::G_FSUB: + C1.subtract(C2, APFloat::rmNearestTiesToEven); + return C1; + case TargetOpcode::G_FMUL: + C1.multiply(C2, APFloat::rmNearestTiesToEven); + return C1; + case TargetOpcode::G_FDIV: + C1.divide(C2, APFloat::rmNearestTiesToEven); + return C1; + case TargetOpcode::G_FREM: + C1.mod(C2); + return C1; + case TargetOpcode::G_FCOPYSIGN: + C1.copySign(C2); + return C1; + case TargetOpcode::G_FMINNUM: + return minnum(C1, C2); + case TargetOpcode::G_FMAXNUM: + return maxnum(C1, C2); + case TargetOpcode::G_FMINIMUM: + return minimum(C1, C2); + case TargetOpcode::G_FMAXIMUM: + return maximum(C1, C2); + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + // FIXME: These operations were unfortunately named. fminnum/fmaxnum do not + // follow the IEEE behavior for signaling nans and follow libm's fmin/fmax, + // and currently there isn't a nice wrapper in APFloat for the version with + // correct snan handling. + break; + default: + break; + } + + return None; +} + bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, bool SNaN) { const MachineInstr *DefMI = MRI.getVRegDef(Val); @@ -484,6 +553,42 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) { + return !FPVal->getValueAPF().isNaN() || + (SNaN && !FPVal->getValueAPF().isSignaling()); + } + + if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + for (const auto &Op : DefMI->uses()) + if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN)) + return false; + return true; + } + + switch (DefMI->getOpcode()) { + default: + break; + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: { + if (SNaN) + return true; + // This can return a NaN if either operand is an sNaN, or if both operands + // are NaN. + return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) && + isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) || + (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) && + isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI)); + } + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: { + // Only one needs to be known not-nan, since it will be returned if the + // other ends up being one. + return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) || + isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN); + } + } + if (SNaN) { // FP operations quiet. For now, just handle the ones inserted during // legalization. @@ -509,6 +614,11 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, MPO.Offset); } + if (const Value *V = MPO.V.dyn_cast<const Value *>()) { + const Module *M = MF.getFunction().getParent(); + return V->getPointerAlignment(M->getDataLayout()); + } + return Align(1); } @@ -563,6 +673,19 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, return None; } +Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, + Register Src, + const MachineRegisterInfo &MRI) { + assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP); + if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) { + APFloat DstVal(getFltSemanticForLLT(DstTy)); + DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP, + APFloat::rmNearestTiesToEven); + return DstVal; + } + return None; +} + bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB) { Optional<DefinitionAndSourceRegister> DefSrcReg = @@ -599,11 +722,32 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, break; } + case TargetOpcode::G_BUILD_VECTOR: { + // TODO: Probably should have a recursion depth guard since you could have + // bitcasted vector elements. + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB)) + return false; + } + + return true; + } + case TargetOpcode::G_BUILD_VECTOR_TRUNC: { + // Only handle constants since we would need to know if number of leading + // zeros is greater than the truncation amount. + const unsigned BitWidth = Ty.getScalarSizeInBits(); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI); + if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2()) + return false; + } + + return true; + } default: break; } - // TODO: Are all operands of a build vector constant powers of two? if (!KB) return false; @@ -642,8 +786,9 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(), TargetTy.getNumElements()); // Prefer the original element type. - int Mul = OrigTy.getNumElements() * TargetTy.getNumElements(); - return LLT::vector(Mul / GCDElts, OrigTy.getElementType()); + ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements(); + return LLT::vector(Mul.divideCoefficientBy(GCDElts), + OrigTy.getElementType()); } } else { if (OrigElt.getSizeInBits() == TargetSize) @@ -651,12 +796,12 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { } unsigned LCMSize = getLCMSize(OrigSize, TargetSize); - return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); + return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt); } if (TargetTy.isVector()) { unsigned LCMSize = getLCMSize(OrigSize, TargetSize); - return LLT::vector(LCMSize / OrigSize, OrigTy); + return LLT::fixed_vector(LCMSize / OrigSize, OrigTy); } unsigned LCMSize = getLCMSize(OrigSize, TargetSize); @@ -684,7 +829,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { int GCD = greatestCommonDivisor(OrigTy.getNumElements(), TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigElt); + return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt); } } else { // If the source is a vector of pointers, return a pointer element. @@ -700,7 +845,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { // scalar. if (GCD < OrigElt.getSizeInBits()) return LLT::scalar(GCD); - return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); + return LLT::fixed_vector(GCD / OrigElt.getSizeInBits(), OrigElt); } if (TargetTy.isVector()) { @@ -789,6 +934,52 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, return isBuildVectorConstantSplat(MI, MRI, -1); } +Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + if (!isBuildVectorOp(Opc)) + return None; + if (auto Splat = getBuildVectorConstantSplat(MI, MRI)) + return RegOrConstant(*Splat); + auto Reg = MI.getOperand(1).getReg(); + if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), + [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; })) + return None; + return RegOrConstant(Reg); +} + +bool llvm::matchUnaryPredicate( + const MachineRegisterInfo &MRI, Register Reg, + std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) { + + const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + return Match(nullptr); + + // TODO: Also handle fconstant + if (Def->getOpcode() == TargetOpcode::G_CONSTANT) + return Match(Def->getOperand(1).getCImm()); + + if (Def->getOpcode() != TargetOpcode::G_BUILD_VECTOR) + return false; + + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + Register SrcElt = Def->getOperand(I).getReg(); + const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI); + if (AllowUndefs && SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) { + if (!Match(nullptr)) + return false; + continue; + } + + if (SrcDef->getOpcode() != TargetOpcode::G_CONSTANT || + !Match(SrcDef->getOperand(1).getCImm())) + return false; + } + + return true; +} + bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP) { switch (TLI.getBooleanContents(IsVector, IsFP)) { @@ -813,3 +1004,10 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, } llvm_unreachable("Invalid boolean contents"); } + +bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + const auto &F = MBB.getParent()->getFunction(); + return F.hasOptSize() || F.hasMinSize() || + llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); +} diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 810b10c9c82a..4316034371a5 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -232,11 +232,9 @@ bool HardwareLoops::runOnFunction(Function &F) { AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); M = F.getParent(); - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { - Loop *L = *I; + for (Loop *L : *LI) if (L->isOutermost()) TryConvertLoop(L); - } return MadeChange; } @@ -408,8 +406,8 @@ Value *HardwareLoop::InitLoopCount() { } if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " - << *TripCount << "\n"); + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount + << "\n"); return nullptr; } @@ -426,9 +424,9 @@ Value *HardwareLoop::InitLoopCount() { UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count); BeginBB = UseLoopGuard ? BB : L->getLoopPreheader(); LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n" - << " - Expanded Count in " << BB->getName() << "\n" - << " - Will insert set counter intrinsic into: " - << BeginBB->getName() << "\n"); + << " - Expanded Count in " << BB->getName() << "\n" + << " - Will insert set counter intrinsic into: " + << BeginBB->getName() << "\n"); return Count; } @@ -436,25 +434,32 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { IRBuilder<> Builder(BeginBB->getTerminator()); Type *Ty = LoopCountInit->getType(); bool UsePhi = UsePHICounter || ForceHardwareLoopPHI; - Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations - : (UsePhi ? Intrinsic::start_loop_iterations - : Intrinsic::set_loop_iterations); + Intrinsic::ID ID = UseLoopGuard + ? (UsePhi ? Intrinsic::test_start_loop_iterations + : Intrinsic::test_set_loop_iterations) + : (UsePhi ? Intrinsic::start_loop_iterations + : Intrinsic::set_loop_iterations); Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty); - Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit); + Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit); // Use the return value of the intrinsic to control the entry of the loop. if (UseLoopGuard) { assert((isa<BranchInst>(BeginBB->getTerminator()) && cast<BranchInst>(BeginBB->getTerminator())->isConditional()) && "Expected conditional branch"); + + Value *SetCount = + UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup; auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator()); LoopGuard->setCondition(SetCount); if (LoopGuard->getSuccessor(0) != L->getLoopPreheader()) LoopGuard->swapSuccessors(); } - LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " - << *SetCount << "\n"); - return UseLoopGuard ? LoopCountInit : SetCount; + LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup + << "\n"); + if (UsePhi && UseLoopGuard) + LoopSetup = Builder.CreateExtractValue(LoopSetup, 0); + return !UsePhi ? LoopCountInit : LoopSetup; } void HardwareLoop::InsertLoopDec() { diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 37be2eabf5fe..681e2f3dc848 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -300,7 +300,7 @@ namespace { MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); - unsigned Dups1, Dups2; + unsigned Dups1 = 0, Dups2 = 0; if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, *TBBInfo.BB, *FBBInfo.BB, /*SkipUnconditionalBranches*/ true)) @@ -742,8 +742,8 @@ bool IfConverter::CountDuplicatedInstructions( bool SkipUnconditionalBranches) const { while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - TIB = skipDebugInstructionsForward(TIB, TIE); - FIB = skipDebugInstructionsForward(FIB, FIE); + TIB = skipDebugInstructionsForward(TIB, TIE, false); + FIB = skipDebugInstructionsForward(FIB, FIE, false); if (TIB == TIE || FIB == FIE) break; if (!TIB->isIdenticalTo(*FIB)) @@ -785,8 +785,8 @@ bool IfConverter::CountDuplicatedInstructions( while (RTIE != RTIB && RFIE != RFIB) { // Skip dbg_value instructions. These do not count. // Note that these are reverse iterators going forward. - RTIE = skipDebugInstructionsForward(RTIE, RTIB); - RFIE = skipDebugInstructionsForward(RFIE, RFIB); + RTIE = skipDebugInstructionsForward(RTIE, RTIB, false); + RFIE = skipDebugInstructionsForward(RFIE, RFIB, false); if (RTIE == RTIB || RFIE == RFIB) break; if (!RTIE->isIdenticalTo(*RFIE)) @@ -838,8 +838,8 @@ static void verifySameBranchInstructions( MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin(); MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin(); while (E1 != B1 && E2 != B2) { - skipDebugInstructionsForward(E1, B1); - skipDebugInstructionsForward(E2, B2); + skipDebugInstructionsForward(E1, B1, false); + skipDebugInstructionsForward(E2, B2, false); if (E1 == B1 && E2 == B2) break; @@ -1564,8 +1564,8 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (MRI->tracksLiveness()) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - Redefs.addLiveIns(CvtMBB); - Redefs.addLiveIns(NextMBB); + Redefs.addLiveInsNoPristines(CvtMBB); + Redefs.addLiveInsNoPristines(NextMBB); } // Remove the branches from the entry so we can add the contents of the true @@ -1665,8 +1665,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // predicated instructions. Redefs.init(*TRI); if (MRI->tracksLiveness()) { - Redefs.addLiveIns(CvtMBB); - Redefs.addLiveIns(NextMBB); + Redefs.addLiveInsNoPristines(CvtMBB); + Redefs.addLiveInsNoPristines(NextMBB); } bool HasEarlyExit = CvtBBI->FalseBB != nullptr; @@ -1828,14 +1828,14 @@ bool IfConverter::IfConvertDiamondCommon( // after tracking the BB1 instructions. Redefs.init(*TRI); if (MRI->tracksLiveness()) { - Redefs.addLiveIns(MBB1); - Redefs.addLiveIns(MBB2); + Redefs.addLiveInsNoPristines(MBB1); + Redefs.addLiveInsNoPristines(MBB2); } // Remove the duplicated instructions at the beginnings of both paths. // Skip dbg_value instructions. - MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(); - MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(); + MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(false); + MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(false); BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 5cdaa9b74e80..0882ce366c9c 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -353,10 +353,9 @@ ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI, return AR_MayAlias; continue; } - llvm::AliasResult AAResult = AA->alias( - MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), - MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())); - if (AAResult != NoAlias) + if (!AA->isNoAlias( + MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), + MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()))) return AR_MayAlias; } } diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index 4473a139d3ad..e4606daba352 100644 --- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -28,9 +28,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -59,6 +61,10 @@ public: initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + } + bool runOnFunction(Function &F) override; }; @@ -66,8 +72,11 @@ public: char IndirectBrExpandPass::ID = 0; -INITIALIZE_PASS(IndirectBrExpandPass, DEBUG_TYPE, - "Expand indirectbr instructions", false, false) +INITIALIZE_PASS_BEGIN(IndirectBrExpandPass, DEBUG_TYPE, + "Expand indirectbr instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(IndirectBrExpandPass, DEBUG_TYPE, + "Expand indirectbr instructions", false, false) FunctionPass *llvm::createIndirectBrExpandPass() { return new IndirectBrExpandPass(); @@ -85,6 +94,10 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { return false; TLI = STI.getTargetLowering(); + Optional<DomTreeUpdater> DTU; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); + SmallVector<IndirectBrInst *, 1> IndirectBrs; // Set of all potential successors for indirectbr instructions. @@ -158,10 +171,22 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { if (BBs.empty()) { // There are no blocks whose address is taken, so any indirectbr instruction // cannot get a valid input and we can replace all of them with unreachable. + SmallVector<DominatorTree::UpdateType, 8> Updates; + if (DTU) + Updates.reserve(IndirectBrSuccs.size()); for (auto *IBr : IndirectBrs) { + if (DTU) { + for (BasicBlock *SuccBB : IBr->successors()) + Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB}); + } (void)new UnreachableInst(F.getContext(), IBr); IBr->eraseFromParent(); } + if (DTU) { + assert(Updates.size() == IndirectBrSuccs.size() && + "Got unexpected update count."); + DTU->applyUpdates(Updates); + } return true; } @@ -183,12 +208,22 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr); }; + SmallVector<DominatorTree::UpdateType, 8> Updates; + if (IndirectBrs.size() == 1) { // If we only have one indirectbr, we can just directly replace it within // its block. - SwitchBB = IndirectBrs[0]->getParent(); - SwitchValue = GetSwitchValue(IndirectBrs[0]); - IndirectBrs[0]->eraseFromParent(); + IndirectBrInst *IBr = IndirectBrs[0]; + SwitchBB = IBr->getParent(); + SwitchValue = GetSwitchValue(IBr); + if (DTU) { + Updates.reserve(IndirectBrSuccs.size()); + for (BasicBlock *SuccBB : IBr->successors()) + Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB}); + assert(Updates.size() == IndirectBrSuccs.size() && + "Got unexpected update count."); + } + IBr->eraseFromParent(); } else { // Otherwise we need to create a new block to hold the switch across BBs, // jump to that block instead of each indirectbr, and phi together the @@ -200,9 +235,16 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { // Now replace the indirectbr instructions with direct branches to the // switch block and fill out the PHI operands. + if (DTU) + Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size()); for (auto *IBr : IndirectBrs) { SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent()); BranchInst::Create(SwitchBB, IBr); + if (DTU) { + Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB}); + for (BasicBlock *SuccBB : IBr->successors()) + Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB}); + } IBr->eraseFromParent(); } } @@ -215,5 +257,15 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { for (int i : llvm::seq<int>(1, BBs.size())) SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]); + if (DTU) { + // If there were multiple indirectbr's, they may have common successors, + // but in the dominator tree, we only track unique edges. + SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end()); + Updates.reserve(Updates.size() + UniqueSuccessors.size()); + for (BasicBlock *BB : UniqueSuccessors) + Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + DTU->applyUpdates(Updates); + } + return true; } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 876e1d3f932a..71e91b445d9a 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -173,7 +173,7 @@ class InlineSpiller : public Spiller { LiveRangeEdit *Edit; LiveInterval *StackInt; int StackSlot; - unsigned Original; + Register Original; // All registers to spill to StackSlot, including the main register. SmallVector<Register, 8> RegsToSpill; @@ -191,19 +191,23 @@ class InlineSpiller : public Spiller { // Object records spills information and does the hoisting. HoistSpillHelper HSpiller; + // Live range weight calculator. + VirtRegAuxInfo &VRAI; + ~InlineSpiller() override = default; public: - InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), - LSS(pass.getAnalysis<LiveStacks>()), - AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), - MDT(pass.getAnalysis<MachineDominatorTree>()), - Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), - MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), - TRI(*mf.getSubtarget().getRegisterInfo()), - MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), - HSpiller(pass, mf, vrm) {} + InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI) + : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()), + LSS(Pass.getAnalysis<LiveStacks>()), + AA(&Pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), + MDT(Pass.getAnalysis<MachineDominatorTree>()), + Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM), + MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), + TRI(*MF.getSubtarget().getRegisterInfo()), + MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()), + HSpiller(Pass, MF, VRM), VRAI(VRAI) {} void spill(LiveRangeEdit &) override; void postOptimization() override; @@ -239,10 +243,10 @@ Spiller::~Spiller() = default; void Spiller::anchor() {} -Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) { - return new InlineSpiller(pass, mf, vrm); +Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass, + MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI) { + return new InlineSpiller(Pass, MF, VRM, VRAI); } //===----------------------------------------------------------------------===// @@ -1044,7 +1048,7 @@ void InlineSpiller::spillAroundUses(Register Reg) { // Modify DBG_VALUE now that the value is in a spill slot. MachineBasicBlock *MBB = MI->getParent(); LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); - buildDbgValueForSpill(*MBB, MI, *MI, StackSlot); + buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg); MBB->erase(MI); continue; } @@ -1200,7 +1204,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, Loops, MBFI); + Edit->calculateRegClassAndHint(MF, VRAI); } /// Optimizations after all the reg selections and spills are done. @@ -1241,13 +1245,16 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, /// i.e., there should be a living sibling of OrigReg at the insert point. bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, MachineBasicBlock &BB, Register &LiveReg) { - SlotIndex Idx; + SlotIndex Idx = IPA.getLastInsertPoint(OrigLI, BB); + // The original def could be after the last insert point in the root block, + // we can't hoist to here. + if (Idx < OrigVNI.def) { + // TODO: We could be better here. If LI is not alive in landing pad + // we could hoist spill after LIP. + LLVM_DEBUG(dbgs() << "can't spill in root block - def after LIP\n"); + return false; + } Register OrigReg = OrigLI.reg(); - MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); - if (MI != BB.end()) - Idx = LIS.getInstructionIndex(*MI); - else - Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg]; assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI"); diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index b22e6faeb91c..24a57cc21c57 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -385,8 +385,7 @@ bool InterleavedAccess::lowerInterleavedLoad( return !Extracts.empty() || BinOpShuffleChanged; } - for (auto SVI : Shuffles) - DeadInsts.push_back(SVI); + append_range(DeadInsts, Shuffles); DeadInsts.push_back(LI); return true; @@ -409,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles( auto *NewSVI2 = new ShuffleVectorInst( BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, SVI->getName(), SVI); - Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, - BI->getName(), SVI); + BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags( + BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI); SVI->replaceAllUsesWith(NewBI); LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI << "\n With : " << *NewSVI1 << "\n And : " diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index ff3f93d51ea8..71bfb1d87d66 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index f9b7bf613ff6..37c0b44ea2b2 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -64,8 +64,12 @@ void LLVMTargetMachine::initAsmInfo() { if (Options.BinutilsVersion.first > 0) TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion); - if (Options.DisableIntegratedAS) + if (Options.DisableIntegratedAS) { TmpAsmInfo->setUseIntegratedAssembler(false); + // If there is explict option disable integratedAS, we can't use it for + // inlineasm either. + TmpAsmInfo->setParseInlineAsmUsingAsmParser(false); + } TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); diff --git a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index 8a7a41d0f763..c3e0553418a5 100644 --- a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -55,9 +55,8 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// of SU, return it, otherwise return null. SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); + for (const SDep &P : SU->Preds) { + SUnit &Pred = *P.getSUnit(); if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. @@ -90,10 +89,8 @@ void LatencyPriorityQueue::push(SUnit *SU) { // single predecessor has a higher priority, since scheduling it will make // the node available. void LatencyPriorityQueue::scheduledNode(SUnit *SU) { - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - AdjustPriorityOfUnscheduledPreds(I->getSUnit()); - } + for (const SDep &Succ : SU->Succs) + AdjustPriorityOfUnscheduledPreds(Succ.getSUnit()); } /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp index 8139c2cbb6cd..47c19c3d8ec4 100644 --- a/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/llvm/lib/CodeGen/LexicalScopes.cpp @@ -75,6 +75,11 @@ void LexicalScopes::extractLexicalScopes( const MachineInstr *PrevMI = nullptr; const DILocation *PrevDL = nullptr; for (const auto &MInsn : MBB) { + // Ignore DBG_VALUE and similar instruction that do not contribute to any + // instruction in the output. + if (MInsn.isMetaInstruction()) + continue; + // Check if instruction has valid location information. const DILocation *MIDL = MInsn.getDebugLoc(); if (!MIDL) { @@ -88,11 +93,6 @@ void LexicalScopes::extractLexicalScopes( continue; } - // Ignore DBG_VALUE and similar instruction that do not contribute to any - // instruction in the output. - if (MInsn.isMetaInstruction()) - continue; - if (RangeBeginMI) { // If we have already seen a beginning of an instruction range and // current instruction scope does not match scope of first instruction diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 18ffe8ba0669..dc9907058340 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -148,6 +148,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -160,6 +161,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -184,6 +186,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -199,23 +203,16 @@ using namespace llvm; +// SSAUpdaterImple sets DEBUG_TYPE, change it. +#undef DEBUG_TYPE #define DEBUG_TYPE "livedebugvalues" -STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); - // Act more like the VarLoc implementation, by propagating some locations too // far and ignoring some transfers. static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden, cl::desc("Act like old LiveDebugValues did"), cl::init(false)); -// Rely on isStoreToStackSlotPostFE and similar to observe all stack spills. -static cl::opt<bool> - ObserveAllStackops("observe-all-stack-ops", cl::Hidden, - cl::desc("Allow non-kill spill and restores"), - cl::init(false)); - namespace { // The location at which a spilled value resides. It consists of a register and @@ -959,25 +956,27 @@ public: class TransferTracker { public: const TargetInstrInfo *TII; + const TargetLowering *TLI; /// This machine location tracker is assumed to always contain the up-to-date /// value mapping for all machine locations. TransferTracker only reads /// information from it. (XXX make it const?) MLocTracker *MTracker; MachineFunction &MF; + bool ShouldEmitDebugEntryValues; /// Record of all changes in variable locations at a block position. Awkwardly /// we allow inserting either before or after the point: MBB != nullptr /// indicates it's before, otherwise after. struct Transfer { - MachineBasicBlock::iterator Pos; /// Position to insert DBG_VALUes - MachineBasicBlock *MBB; /// non-null if we should insert after. + MachineBasicBlock::instr_iterator Pos; /// Position to insert DBG_VALUes + MachineBasicBlock *MBB; /// non-null if we should insert after. SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert. }; - typedef struct { + struct LocAndProperties { LocIdx Loc; DbgValueProperties Properties; - } LocAndProperties; + }; /// Collection of transfers (DBG_VALUEs) to be inserted. SmallVector<Transfer, 32> Transfers; @@ -1027,9 +1026,13 @@ public: TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker, MachineFunction &MF, const TargetRegisterInfo &TRI, - const BitVector &CalleeSavedRegs) + const BitVector &CalleeSavedRegs, const TargetPassConfig &TPC) : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI), - CalleeSavedRegs(CalleeSavedRegs) {} + CalleeSavedRegs(CalleeSavedRegs) { + TLI = MF.getSubtarget().getTargetLowering(); + auto &TM = TPC.getTM<TargetMachine>(); + ShouldEmitDebugEntryValues = TM.Options.ShouldEmitDebugEntryValues(); + } /// Load object with live-in variable values. \p mlocs contains the live-in /// values in each machine location, while \p vlocs the live-in variable @@ -1097,6 +1100,8 @@ public: // use-before-def to be resolved as we step through the block. if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) addUseBeforeDef(Var.first, Var.second.Properties, Num); + else + recoverAsEntryValue(Var.first, Var.second.Properties, Num); continue; } @@ -1152,10 +1157,73 @@ public: /// Helper to move created DBG_VALUEs into Transfers collection. void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) { - if (PendingDbgValues.size() > 0) { - Transfers.push_back({Pos, MBB, PendingDbgValues}); - PendingDbgValues.clear(); - } + if (PendingDbgValues.size() == 0) + return; + + // Pick out the instruction start position. + MachineBasicBlock::instr_iterator BundleStart; + if (MBB && Pos == MBB->begin()) + BundleStart = MBB->instr_begin(); + else + BundleStart = getBundleStart(Pos->getIterator()); + + Transfers.push_back({BundleStart, MBB, PendingDbgValues}); + PendingDbgValues.clear(); + } + + bool isEntryValueVariable(const DebugVariable &Var, + const DIExpression *Expr) const { + if (!Var.getVariable()->isParameter()) + return false; + + if (Var.getInlinedAt()) + return false; + + if (Expr->getNumElements() > 0) + return false; + + return true; + } + + bool isEntryValueValue(const ValueIDNum &Val) const { + // Must be in entry block (block number zero), and be a PHI / live-in value. + if (Val.getBlock() || !Val.isPHI()) + return false; + + // Entry values must enter in a register. + if (MTracker->isSpill(Val.getLoc())) + return false; + + Register SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI.getFrameRegister(MF); + Register Reg = MTracker->LocIdxToLocID[Val.getLoc()]; + return Reg != SP && Reg != FP; + } + + bool recoverAsEntryValue(const DebugVariable &Var, DbgValueProperties &Prop, + const ValueIDNum &Num) { + // Is this variable location a candidate to be an entry value. First, + // should we be trying this at all? + if (!ShouldEmitDebugEntryValues) + return false; + + // Is the variable appropriate for entry values (i.e., is a parameter). + if (!isEntryValueVariable(Var, Prop.DIExpr)) + return false; + + // Is the value assigned to this variable still the entry value? + if (!isEntryValueValue(Num)) + return false; + + // Emit a variable location using an entry value expression. + DIExpression *NewExpr = + DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue); + Register Reg = MTracker->LocIdxToLocID[Num.getLoc()]; + MachineOperand MO = MachineOperand::CreateReg(Reg, false); + MO.setIsDebug(true); + + PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect})); + return true; } /// Change a variable value after encountering a DBG_VALUE inside a block. @@ -1224,26 +1292,70 @@ public: } } - /// Explicitly terminate variable locations based on \p mloc. Creates undef - /// DBG_VALUEs for any variables that were located there, and clears - /// #ActiveMLoc / #ActiveVLoc tracking information for that location. - void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) { - assert(MTracker->isSpill(MLoc)); + /// Account for a location \p mloc being clobbered. Examine the variable + /// locations that will be terminated: and try to recover them by using + /// another location. Optionally, given \p MakeUndef, emit a DBG_VALUE to + /// explicitly terminate a location if it can't be recovered. + void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos, + bool MakeUndef = true) { auto ActiveMLocIt = ActiveMLocs.find(MLoc); if (ActiveMLocIt == ActiveMLocs.end()) return; + // What was the old variable value? + ValueIDNum OldValue = VarLocs[MLoc.asU64()]; VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue; + // Examine the remaining variable locations: if we can find the same value + // again, we can recover the location. + Optional<LocIdx> NewLoc = None; + for (auto Loc : MTracker->locations()) + if (Loc.Value == OldValue) + NewLoc = Loc.Idx; + + // If there is no location, and we weren't asked to make the variable + // explicitly undef, then stop here. + if (!NewLoc && !MakeUndef) { + // Try and recover a few more locations with entry values. + for (auto &Var : ActiveMLocIt->second) { + auto &Prop = ActiveVLocs.find(Var)->second.Properties; + recoverAsEntryValue(Var, Prop, OldValue); + } + flushDbgValues(Pos, nullptr); + return; + } + + // Examine all the variables based on this location. + DenseSet<DebugVariable> NewMLocs; for (auto &Var : ActiveMLocIt->second) { auto ActiveVLocIt = ActiveVLocs.find(Var); - // Create an undef. We can't feed in a nullptr DIExpression alas, - // so use the variables last expression. Pass None as the location. + // Re-state the variable location: if there's no replacement then NewLoc + // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE + // identifying the alternative location will be emitted. const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr; DbgValueProperties Properties(Expr, false); - PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties)); - ActiveVLocs.erase(ActiveVLocIt); + PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties)); + + // Update machine locations <=> variable locations maps. Defer updating + // ActiveMLocs to avoid invalidaing the ActiveMLocIt iterator. + if (!NewLoc) { + ActiveVLocs.erase(ActiveVLocIt); + } else { + ActiveVLocIt->second.Loc = *NewLoc; + NewMLocs.insert(Var); + } } + + // Commit any deferred ActiveMLoc changes. + if (!NewMLocs.empty()) + for (auto &Var : NewMLocs) + ActiveMLocs[*NewLoc].insert(Var); + + // We lazily track what locations have which values; if we've found a new + // location for the clobbered value, remember it. + if (NewLoc) + VarLocs[NewLoc->asU64()] = OldValue; + flushDbgValues(Pos, nullptr); ActiveMLocIt->second.clear(); @@ -1332,6 +1444,7 @@ private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const TargetFrameLowering *TFI; + const MachineFrameInfo *MFI; BitVector CalleeSavedRegs; LexicalScopes LS; TargetPassConfig *TPC; @@ -1372,6 +1485,23 @@ private: /// instruction numbers in DBG_INSTR_REFs into machine value numbers. std::map<uint64_t, InstAndNum> DebugInstrNumToInstr; + /// Record of where we observed a DBG_PHI instruction. + class DebugPHIRecord { + public: + uint64_t InstrNum; ///< Instruction number of this DBG_PHI. + MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. + ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. + LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. + + operator unsigned() const { return InstrNum; } + }; + + /// Map from instruction numbers defined by DBG_PHIs to a record of what that + /// DBG_PHI read and where. Populated and edited during the machine value + /// location problem -- we use LLVMs SSA Updater to fix changes by + /// optimizations that destroy PHI instructions. + SmallVector<DebugPHIRecord, 32> DebugPHINumToValue; + // Map of overlapping variable fragments. OverlapMap OverlapFragments; VarToFragments SeenFragments; @@ -1398,7 +1528,8 @@ private: SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); /// Observe a single instruction while stepping through a block. - void process(MachineInstr &MI); + void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, + ValueIDNum **MLiveIns = nullptr); /// Examines whether \p MI is a DBG_VALUE and notifies trackers. /// \returns true if MI was recognized and processed. @@ -1406,7 +1537,13 @@ private: /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. /// \returns true if MI was recognized and processed. - bool transferDebugInstrRef(MachineInstr &MI); + bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns); + + /// Stores value-information about where this PHI occurred, and what + /// instruction number is associated with it. + /// \returns true if MI was recognized and processed. + bool transferDebugPHI(MachineInstr &MI); /// Examines whether \p MI is copy instruction, and notifies trackers. /// \returns true if MI was recognized and processed. @@ -1425,6 +1562,18 @@ private: void accumulateFragmentMap(MachineInstr &MI); + /// Determine the machine value number referred to by (potentially several) + /// DBG_PHI instructions. Block duplication and tail folding can duplicate + /// DBG_PHIs, shifting the position where values in registers merge, and + /// forming another mini-ssa problem to solve. + /// \p Here the position of a DBG_INSTR_REF seeking a machine value number + /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. + /// \returns The machine value number at position Here, or None. + Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, MachineInstr &Here, + uint64_t InstrNum); + /// Step through the function, recording register definitions and movements /// in an MLocTracker. Convert the observations into a per-block transfer /// function in \p MLocTransfer, suitable for using with the machine value @@ -1527,8 +1676,9 @@ private: /// right now "order of appearence in function, when explored in RPO", so /// that we can compare explictly against VarLocBasedImpl. void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, - ValueIDNum **MInLocs, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering); + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering, + const TargetPassConfig &TPC); /// Boilerplate computation of some initial sets, artifical blocks and /// RPOT block ordering. @@ -1640,7 +1790,9 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { return true; } -bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { +bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns) { if (!MI.isDebugRef()) return false; @@ -1669,12 +1821,22 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { // Various optimizations may have happened to the value during codegen, // recorded in the value substitution table. Apply any substitutions to - // the instruction / operand number in this DBG_INSTR_REF. - auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); - while (Sub != MF.DebugValueSubstitutions.end()) { - InstNo = Sub->second.first; - OpNo = Sub->second.second; - Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); + // the instruction / operand number in this DBG_INSTR_REF, and collect + // any subregister extractions performed during optimization. + + // Create dummy substitution with Src set, for lookup. + auto SoughtSub = + MachineFunction::DebugSubstitution({InstNo, OpNo}, {0, 0}, 0); + + SmallVector<unsigned, 4> SeenSubregs; + auto LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub); + while (LowerBoundIt != MF.DebugValueSubstitutions.end() && + LowerBoundIt->Src == SoughtSub.Src) { + std::tie(InstNo, OpNo) = LowerBoundIt->Dest; + SoughtSub.Src = LowerBoundIt->Dest; + if (unsigned Subreg = LowerBoundIt->Subreg) + SeenSubregs.push_back(Subreg); + LowerBoundIt = llvm::lower_bound(MF.DebugValueSubstitutions, SoughtSub); } // Default machine value number is <None> -- if no instruction defines @@ -1682,8 +1844,10 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { Optional<ValueIDNum> NewID = None; // Try to lookup the instruction number, and find the machine value number - // that it defines. + // that it defines. It could be an instruction, or a PHI. auto InstrIt = DebugInstrNumToInstr.find(InstNo); + auto PHIIt = std::lower_bound(DebugPHINumToValue.begin(), + DebugPHINumToValue.end(), InstNo); if (InstrIt != DebugInstrNumToInstr.end()) { const MachineInstr &TargetInstr = *InstrIt->second.first; uint64_t BlockNo = TargetInstr.getParent()->getNumber(); @@ -1698,6 +1862,82 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { unsigned LocID = MTracker->getLocID(MO.getReg(), false); LocIdx L = MTracker->LocIDToLocIdx[LocID]; NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) { + // It's actually a PHI value. Which value it is might not be obvious, use + // the resolver helper to find out. + NewID = resolveDbgPHIs(*MI.getParent()->getParent(), MLiveOuts, MLiveIns, + MI, InstNo); + } + + // Apply any subregister extractions, in reverse. We might have seen code + // like this: + // CALL64 @foo, implicit-def $rax + // %0:gr64 = COPY $rax + // %1:gr32 = COPY %0.sub_32bit + // %2:gr16 = COPY %1.sub_16bit + // %3:gr8 = COPY %2.sub_8bit + // In which case each copy would have been recorded as a substitution with + // a subregister qualifier. Apply those qualifiers now. + if (NewID && !SeenSubregs.empty()) { + unsigned Offset = 0; + unsigned Size = 0; + + // Look at each subregister that we passed through, and progressively + // narrow in, accumulating any offsets that occur. Substitutions should + // only ever be the same or narrower width than what they read from; + // iterate in reverse order so that we go from wide to small. + for (unsigned Subreg : reverse(SeenSubregs)) { + unsigned ThisSize = TRI->getSubRegIdxSize(Subreg); + unsigned ThisOffset = TRI->getSubRegIdxOffset(Subreg); + Offset += ThisOffset; + Size = (Size == 0) ? ThisSize : std::min(Size, ThisSize); + } + + // If that worked, look for an appropriate subregister with the register + // where the define happens. Don't look at values that were defined during + // a stack write: we can't currently express register locations within + // spills. + LocIdx L = NewID->getLoc(); + if (NewID && !MTracker->isSpill(L)) { + // Find the register class for the register where this def happened. + // FIXME: no index for this? + Register Reg = MTracker->LocIdxToLocID[L]; + const TargetRegisterClass *TRC = nullptr; + for (auto *TRCI : TRI->regclasses()) + if (TRCI->contains(Reg)) + TRC = TRCI; + assert(TRC && "Couldn't find target register class?"); + + // If the register we have isn't the right size or in the right place, + // Try to find a subregister inside it. + unsigned MainRegSize = TRI->getRegSizeInBits(*TRC); + if (Size != MainRegSize || Offset) { + // Enumerate all subregisters, searching. + Register NewReg = 0; + for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { + unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); + unsigned SubregSize = TRI->getSubRegIdxSize(Subreg); + unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg); + if (SubregSize == Size && SubregOffset == Offset) { + NewReg = *SRI; + break; + } + } + + // If we didn't find anything: there's no way to express our value. + if (!NewReg) { + NewID = None; + } else { + // Re-state the value as being defined within the subregister + // that we found. + LocIdx NewLoc = MTracker->lookupOrTrackRegister(NewReg); + NewID = ValueIDNum(NewID->getBlock(), NewID->getInst(), NewLoc); + } + } + } else { + // If we can't handle subregisters, unset the new value. + NewID = None; + } } // We, we have a value number or None. Tell the variable value tracker about @@ -1752,6 +1992,55 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties); TTracker->PendingDbgValues.push_back(DbgMI); TTracker->flushDbgValues(MI.getIterator(), nullptr); + return true; +} + +bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { + if (!MI.isDebugPHI()) + return false; + + // Analyse these only when solving the machine value location problem. + if (VTracker || TTracker) + return true; + + // First operand is the value location, either a stack slot or register. + // Second is the debug instruction number of the original PHI. + const MachineOperand &MO = MI.getOperand(0); + unsigned InstrNum = MI.getOperand(1).getImm(); + + if (MO.isReg()) { + // The value is whatever's currently in the register. Read and record it, + // to be analysed later. + Register Reg = MO.getReg(); + ValueIDNum Num = MTracker->readReg(Reg); + auto PHIRec = DebugPHIRecord( + {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)}); + DebugPHINumToValue.push_back(PHIRec); + } else { + // The value is whatever's in this stack slot. + assert(MO.isFI()); + unsigned FI = MO.getIndex(); + + // If the stack slot is dead, then this was optimized away. + // FIXME: stack slot colouring should account for slots that get merged. + if (MFI->isDeadObjectIndex(FI)) + return true; + + // Identify this spill slot. + Register Base; + StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); + SpillLoc SL = {Base, Offs}; + Optional<ValueIDNum> Num = MTracker->readSpill(SL); + + if (!Num) + // Nothing ever writes to this slot. Curious, but nothing we can do. + return true; + + // Record this DBG_PHI for later analysis. + auto DbgPHI = DebugPHIRecord( + {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)}); + DebugPHINumToValue.push_back(DbgPHI); + } return true; } @@ -1803,6 +2092,32 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { for (auto *MO : RegMaskPtrs) MTracker->writeRegMask(MO, CurBB, CurInst); + + if (!TTracker) + return; + + // When committing variable values to locations: tell transfer tracker that + // we've clobbered things. It may be able to recover the variable from a + // different location. + + // Inform TTracker about any direct clobbers. + for (uint32_t DeadReg : DeadRegs) { + LocIdx Loc = MTracker->lookupOrTrackRegister(DeadReg); + TTracker->clobberMloc(Loc, MI.getIterator(), false); + } + + // Look for any clobbers performed by a register mask. Only test locations + // that are actually being tracked. + for (auto L : MTracker->locations()) { + // Stack locations can't be clobbered by regmasks. + if (MTracker->isSpill(L.Idx)) + continue; + + Register Reg = MTracker->LocIdxToLocID[L.Idx]; + for (auto *MO : RegMaskPtrs) + if (MO->clobbersPhysReg(Reg)) + TTracker->clobberMloc(L.Idx, MI.getIterator(), false); + } } void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { @@ -1871,47 +2186,9 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, if (!isSpillInstruction(MI, MF)) return false; - // XXX FIXME: On x86, isStoreToStackSlotPostFE returns '1' instead of an - // actual register number. - if (ObserveAllStackops) { - int FI; - Reg = TII->isStoreToStackSlotPostFE(MI, FI); - return Reg != 0; - } - - auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { - if (!MO.isReg() || !MO.isUse()) { - Reg = 0; - return false; - } - Reg = MO.getReg(); - return MO.isKill(); - }; - - for (const MachineOperand &MO : MI.operands()) { - // In a spill instruction generated by the InlineSpiller the spilled - // register has its kill flag set. - if (isKilledReg(MO, Reg)) - return true; - if (Reg != 0) { - // Check whether next instruction kills the spilled register. - // FIXME: Current solution does not cover search for killed register in - // bundles and instructions further down the chain. - auto NextI = std::next(MI.getIterator()); - // Skip next instruction that points to basic block end iterator. - if (MI.getParent()->end() == NextI) - continue; - unsigned RegNext; - for (const MachineOperand &MONext : NextI->operands()) { - // Return true if we came across the register from the - // previous spill instruction that is killed in NextI. - if (isKilledReg(MONext, RegNext) && RegNext == Reg) - return true; - } - } - } - // Return false if we didn't find spilled register. - return false; + int FI; + Reg = TII->isStoreToStackSlotPostFE(MI, FI); + return Reg != 0; } Optional<SpillLoc> @@ -1950,8 +2227,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { if (TTracker) { Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc); - if (MLoc) + if (MLoc) { + // Un-set this location before clobbering, so that we don't salvage + // the variable location back to the same place. + MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue); TTracker->clobberMloc(*MLoc, MI.getIterator()); + } } } @@ -2066,6 +2347,15 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && SrcReg != DestReg) MTracker->defReg(SrcReg, CurBB, CurInst); + // Finally, the copy might have clobbered variables based on the destination + // register. Tell TTracker about it, in case a backup location exists. + if (TTracker) { + for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { + LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); + TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false); + } + } + return true; } @@ -2124,13 +2414,16 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { AllSeenFragments.insert(ThisFragment); } -void InstrRefBasedLDV::process(MachineInstr &MI) { +void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns) { // Try to interpret an MI as a debug or transfer instruction. Only if it's // none of these should we interpret it's register defs as new value // definitions. if (transferDebugValue(MI)) return; - if (transferDebugInstrRef(MI)) + if (transferDebugInstrRef(MI, MLiveOuts, MLiveIns)) + return; + if (transferDebugPHI(MI)) return; if (transferRegisterCopy(MI)) return; @@ -2641,9 +2934,7 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin( auto &ILS = *ILSIt->second; // Order predecessors by RPOT order, for exploring them in that order. - SmallVector<MachineBasicBlock *, 8> BlockOrders; - for (auto p : MBB.predecessors()) - BlockOrders.push_back(p); + SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors()); auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) { return BBToOrder[A] < BBToOrder[B]; @@ -3128,9 +3419,10 @@ void InstrRefBasedLDV::dump_mloc_transfer( #endif void InstrRefBasedLDV::emitLocations( - MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { - TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs); + MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, DenseMap<DebugVariable, unsigned> &AllVarsNumbering, + const TargetPassConfig &TPC) { + TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); unsigned NumLocs = MTracker->getNumLocs(); // For each block, load in the machine value locations and variable value @@ -3146,7 +3438,7 @@ void InstrRefBasedLDV::emitLocations( CurBB = bbnum; CurInst = 1; for (auto &MI : MBB) { - process(MI); + process(MI, MOutLocs, MInLocs); TTracker->checkInstForNewValues(CurInst, MI.getIterator()); ++CurInst; } @@ -3178,9 +3470,14 @@ void InstrRefBasedLDV::emitLocations( MBB.insert(P.Pos, MI); } } else { + // Terminators, like tail calls, can clobber things. Don't try and place + // transfers after them. + if (P.Pos->isTerminator()) + continue; + MachineBasicBlock &MBB = *P.Pos->getParent(); for (auto *MI : P.Insts) { - MBB.insertAfter(P.Pos, MI); + MBB.insertAfterBundle(P.Pos, MI); } } } @@ -3201,12 +3498,27 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { // Compute mappings of block <=> RPO order. ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; - for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { - OrderToBB[RPONumber] = *RI; - BBToOrder[*RI] = RPONumber; - BBNumToRPO[(*RI)->getNumber()] = RPONumber; + for (MachineBasicBlock *MBB : RPOT) { + OrderToBB[RPONumber] = MBB; + BBToOrder[MBB] = RPONumber; + BBNumToRPO[MBB->getNumber()] = RPONumber; ++RPONumber; } + + // Order value substitutions by their "source" operand pair, for quick lookup. + llvm::sort(MF.DebugValueSubstitutions); + +#ifdef EXPENSIVE_CHECKS + // As an expensive check, test whether there are any duplicate substitution + // sources in the collection. + if (MF.DebugValueSubstitutions.size() > 2) { + for (auto It = MF.DebugValueSubstitutions.begin(); + It != std::prev(MF.DebugValueSubstitutions.end()); ++It) { + assert(It->Src != std::next(It)->Src && "Duplicate variable location " + "substitution seen"); + } + } +#endif } /// Calculate the liveness information for the given machine function and @@ -3224,6 +3536,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->getCalleeSaves(MF, CalleeSavedRegs); + MFI = &MF.getFrameInfo(); LS.initialize(MF); MTracker = @@ -3266,6 +3579,21 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // dataflow problem. mlocDataflow(MInLocs, MOutLocs, MLocTransfer); + // Patch up debug phi numbers, turning unknown block-live-in values into + // either live-through machine values, or PHIs. + for (auto &DBG_PHI : DebugPHINumToValue) { + // Identify unresolved block-live-ins. + ValueIDNum &Num = DBG_PHI.ValueRead; + if (!Num.isPHI()) + continue; + + unsigned BlockNo = Num.getBlock(); + LocIdx LocNo = Num.getLoc(); + Num = MInLocs[BlockNo][LocNo.asU64()]; + } + // Later, we'll be looking up ranges of instruction numbers. + llvm::sort(DebugPHINumToValue); + // Walk back through each block / instruction, collecting DBG_VALUE // instructions and recording what machine value their operands refer to. for (auto &OrderPair : OrderToBB) { @@ -3276,7 +3604,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MTracker->loadFromArray(MInLocs[CurBB], CurBB); CurInst = 1; for (auto &MI : MBB) { - process(MI); + process(MI, MOutLocs, MInLocs); ++CurInst; } MTracker->reset(); @@ -3331,7 +3659,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // Using the computed value locations and variable values for each block, // create the DBG_VALUE instructions representing the extended variable // locations. - emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering); + emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { delete[] MOutLocs[Idx]; @@ -3354,6 +3682,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, BBToOrder.clear(); BBNumToRPO.clear(); DebugInstrNumToInstr.clear(); + DebugPHINumToValue.clear(); return Changed; } @@ -3361,3 +3690,389 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() { return new InstrRefBasedLDV(); } + +namespace { +class LDVSSABlock; +class LDVSSAUpdater; + +// Pick a type to identify incoming block values as we construct SSA. We +// can't use anything more robust than an integer unfortunately, as SSAUpdater +// expects to zero-initialize the type. +typedef uint64_t BlockValueNum; + +/// Represents an SSA PHI node for the SSA updater class. Contains the block +/// this PHI is in, the value number it would have, and the expected incoming +/// values from parent blocks. +class LDVSSAPhi { +public: + SmallVector<std::pair<LDVSSABlock *, BlockValueNum>, 4> IncomingValues; + LDVSSABlock *ParentBlock; + BlockValueNum PHIValNum; + LDVSSAPhi(BlockValueNum PHIValNum, LDVSSABlock *ParentBlock) + : ParentBlock(ParentBlock), PHIValNum(PHIValNum) {} + + LDVSSABlock *getParent() { return ParentBlock; } +}; + +/// Thin wrapper around a block predecessor iterator. Only difference from a +/// normal block iterator is that it dereferences to an LDVSSABlock. +class LDVSSABlockIterator { +public: + MachineBasicBlock::pred_iterator PredIt; + LDVSSAUpdater &Updater; + + LDVSSABlockIterator(MachineBasicBlock::pred_iterator PredIt, + LDVSSAUpdater &Updater) + : PredIt(PredIt), Updater(Updater) {} + + bool operator!=(const LDVSSABlockIterator &OtherIt) const { + return OtherIt.PredIt != PredIt; + } + + LDVSSABlockIterator &operator++() { + ++PredIt; + return *this; + } + + LDVSSABlock *operator*(); +}; + +/// Thin wrapper around a block for SSA Updater interface. Necessary because +/// we need to track the PHI value(s) that we may have observed as necessary +/// in this block. +class LDVSSABlock { +public: + MachineBasicBlock &BB; + LDVSSAUpdater &Updater; + using PHIListT = SmallVector<LDVSSAPhi, 1>; + /// List of PHIs in this block. There should only ever be one. + PHIListT PHIList; + + LDVSSABlock(MachineBasicBlock &BB, LDVSSAUpdater &Updater) + : BB(BB), Updater(Updater) {} + + LDVSSABlockIterator succ_begin() { + return LDVSSABlockIterator(BB.succ_begin(), Updater); + } + + LDVSSABlockIterator succ_end() { + return LDVSSABlockIterator(BB.succ_end(), Updater); + } + + /// SSAUpdater has requested a PHI: create that within this block record. + LDVSSAPhi *newPHI(BlockValueNum Value) { + PHIList.emplace_back(Value, this); + return &PHIList.back(); + } + + /// SSAUpdater wishes to know what PHIs already exist in this block. + PHIListT &phis() { return PHIList; } +}; + +/// Utility class for the SSAUpdater interface: tracks blocks, PHIs and values +/// while SSAUpdater is exploring the CFG. It's passed as a handle / baton to +// SSAUpdaterTraits<LDVSSAUpdater>. +class LDVSSAUpdater { +public: + /// Map of value numbers to PHI records. + DenseMap<BlockValueNum, LDVSSAPhi *> PHIs; + /// Map of which blocks generate Undef values -- blocks that are not + /// dominated by any Def. + DenseMap<MachineBasicBlock *, BlockValueNum> UndefMap; + /// Map of machine blocks to our own records of them. + DenseMap<MachineBasicBlock *, LDVSSABlock *> BlockMap; + /// Machine location where any PHI must occur. + LocIdx Loc; + /// Table of live-in machine value numbers for blocks / locations. + ValueIDNum **MLiveIns; + + LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {} + + void reset() { + for (auto &Block : BlockMap) + delete Block.second; + + PHIs.clear(); + UndefMap.clear(); + BlockMap.clear(); + } + + ~LDVSSAUpdater() { reset(); } + + /// For a given MBB, create a wrapper block for it. Stores it in the + /// LDVSSAUpdater block map. + LDVSSABlock *getSSALDVBlock(MachineBasicBlock *BB) { + auto it = BlockMap.find(BB); + if (it == BlockMap.end()) { + BlockMap[BB] = new LDVSSABlock(*BB, *this); + it = BlockMap.find(BB); + } + return it->second; + } + + /// Find the live-in value number for the given block. Looks up the value at + /// the PHI location on entry. + BlockValueNum getValue(LDVSSABlock *LDVBB) { + return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64(); + } +}; + +LDVSSABlock *LDVSSABlockIterator::operator*() { + return Updater.getSSALDVBlock(*PredIt); +} + +#ifndef NDEBUG + +raw_ostream &operator<<(raw_ostream &out, const LDVSSAPhi &PHI) { + out << "SSALDVPHI " << PHI.PHIValNum; + return out; +} + +#endif + +} // namespace + +namespace llvm { + +/// Template specialization to give SSAUpdater access to CFG and value +/// information. SSAUpdater calls methods in these traits, passing in the +/// LDVSSAUpdater object, to learn about blocks and the values they define. +/// It also provides methods to create PHI nodes and track them. +template <> class SSAUpdaterTraits<LDVSSAUpdater> { +public: + using BlkT = LDVSSABlock; + using ValT = BlockValueNum; + using PhiT = LDVSSAPhi; + using BlkSucc_iterator = LDVSSABlockIterator; + + // Methods to access block successors -- dereferencing to our wrapper class. + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + /// Iterator for PHI operands. + class PHI_iterator { + private: + LDVSSAPhi *PHI; + unsigned Idx; + + public: + explicit PHI_iterator(LDVSSAPhi *P) // begin iterator + : PHI(P), Idx(0) {} + PHI_iterator(LDVSSAPhi *P, bool) // end iterator + : PHI(P), Idx(PHI->IncomingValues.size()) {} + + PHI_iterator &operator++() { + Idx++; + return *this; + } + bool operator==(const PHI_iterator &X) const { return Idx == X.Idx; } + bool operator!=(const PHI_iterator &X) const { return !operator==(X); } + + BlockValueNum getIncomingValue() { return PHI->IncomingValues[Idx].second; } + + LDVSSABlock *getIncomingBlock() { return PHI->IncomingValues[Idx].first; } + }; + + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(LDVSSABlock *BB, + SmallVectorImpl<LDVSSABlock *> *Preds) { + for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(), + E = BB->BB.pred_end(); + PI != E; ++PI) + Preds->push_back(BB->Updater.getSSALDVBlock(*PI)); + } + + /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new + /// register. For LiveDebugValues, represents a block identified as not having + /// any DBG_PHI predecessors. + static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) { + // Create a value number for this block -- it needs to be unique and in the + // "undef" collection, so that we know it's not real. Use a number + // representing a PHI into this block. + BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64(); + Updater->UndefMap[&BB->BB] = Num; + return Num; + } + + /// CreateEmptyPHI - Create a (representation of a) PHI in the given block. + /// SSAUpdater will populate it with information about incoming values. The + /// value number of this PHI is whatever the machine value number problem + /// solution determined it to be. This includes non-phi values if SSAUpdater + /// tries to create a PHI where the incoming values are identical. + static BlockValueNum CreateEmptyPHI(LDVSSABlock *BB, unsigned NumPreds, + LDVSSAUpdater *Updater) { + BlockValueNum PHIValNum = Updater->getValue(BB); + LDVSSAPhi *PHI = BB->newPHI(PHIValNum); + Updater->PHIs[PHIValNum] = PHI; + return PHIValNum; + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(LDVSSAPhi *PHI, BlockValueNum Val, LDVSSABlock *Pred) { + PHI->IncomingValues.push_back(std::make_pair(Pred, Val)); + } + + /// ValueIsPHI - Check if the instruction that defines the specified value + /// is a PHI instruction. + static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) { + auto PHIIt = Updater->PHIs.find(Val); + if (PHIIt == Updater->PHIs.end()) + return nullptr; + return PHIIt->second; + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static LDVSSAPhi *ValueIsNewPHI(BlockValueNum Val, LDVSSAUpdater *Updater) { + LDVSSAPhi *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->IncomingValues.size() == 0) + return PHI; + return nullptr; + } + + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static BlockValueNum GetPHIValue(LDVSSAPhi *PHI) { return PHI->PHIValNum; } +}; + +} // end namespace llvm + +Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, + MachineInstr &Here, + uint64_t InstrNum) { + // Pick out records of DBG_PHI instructions that have been observed. If there + // are none, then we cannot compute a value number. + auto RangePair = std::equal_range(DebugPHINumToValue.begin(), + DebugPHINumToValue.end(), InstrNum); + auto LowerIt = RangePair.first; + auto UpperIt = RangePair.second; + + // No DBG_PHI means there can be no location. + if (LowerIt == UpperIt) + return None; + + // If there's only one DBG_PHI, then that is our value number. + if (std::distance(LowerIt, UpperIt) == 1) + return LowerIt->ValueRead; + + auto DBGPHIRange = make_range(LowerIt, UpperIt); + + // Pick out the location (physreg, slot) where any PHIs must occur. It's + // technically possible for us to merge values in different registers in each + // block, but highly unlikely that LLVM will generate such code after register + // allocation. + LocIdx Loc = LowerIt->ReadLoc; + + // We have several DBG_PHIs, and a use position (the Here inst). All each + // DBG_PHI does is identify a value at a program position. We can treat each + // DBG_PHI like it's a Def of a value, and the use position is a Use of a + // value, just like SSA. We use the bulk-standard LLVM SSA updater class to + // determine which Def is used at the Use, and any PHIs that happen along + // the way. + // Adapted LLVM SSA Updater: + LDVSSAUpdater Updater(Loc, MLiveIns); + // Map of which Def or PHI is the current value in each block. + DenseMap<LDVSSABlock *, BlockValueNum> AvailableValues; + // Set of PHIs that we have created along the way. + SmallVector<LDVSSAPhi *, 8> CreatedPHIs; + + // Each existing DBG_PHI is a Def'd value under this model. Record these Defs + // for the SSAUpdater. + for (const auto &DBG_PHI : DBGPHIRange) { + LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); + const ValueIDNum &Num = DBG_PHI.ValueRead; + AvailableValues.insert(std::make_pair(Block, Num.asU64())); + } + + LDVSSABlock *HereBlock = Updater.getSSALDVBlock(Here.getParent()); + const auto &AvailIt = AvailableValues.find(HereBlock); + if (AvailIt != AvailableValues.end()) { + // Actually, we already know what the value is -- the Use is in the same + // block as the Def. + return ValueIDNum::fromU64(AvailIt->second); + } + + // Otherwise, we must use the SSA Updater. It will identify the value number + // that we are to use, and the PHIs that must happen along the way. + SSAUpdaterImpl<LDVSSAUpdater> Impl(&Updater, &AvailableValues, &CreatedPHIs); + BlockValueNum ResultInt = Impl.GetValue(Updater.getSSALDVBlock(Here.getParent())); + ValueIDNum Result = ValueIDNum::fromU64(ResultInt); + + // We have the number for a PHI, or possibly live-through value, to be used + // at this Use. There are a number of things we have to check about it though: + // * Does any PHI use an 'Undef' (like an IMPLICIT_DEF) value? If so, this + // Use was not completely dominated by DBG_PHIs and we should abort. + // * Are the Defs or PHIs clobbered in a block? SSAUpdater isn't aware that + // we've left SSA form. Validate that the inputs to each PHI are the + // expected values. + // * Is a PHI we've created actually a merging of values, or are all the + // predecessor values the same, leading to a non-PHI machine value number? + // (SSAUpdater doesn't know that either). Remap validated PHIs into the + // the ValidatedValues collection below to sort this out. + DenseMap<LDVSSABlock *, ValueIDNum> ValidatedValues; + + // Define all the input DBG_PHI values in ValidatedValues. + for (const auto &DBG_PHI : DBGPHIRange) { + LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); + const ValueIDNum &Num = DBG_PHI.ValueRead; + ValidatedValues.insert(std::make_pair(Block, Num)); + } + + // Sort PHIs to validate into RPO-order. + SmallVector<LDVSSAPhi *, 8> SortedPHIs; + for (auto &PHI : CreatedPHIs) + SortedPHIs.push_back(PHI); + + std::sort( + SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) { + return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; + }); + + for (auto &PHI : SortedPHIs) { + ValueIDNum ThisBlockValueNum = + MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()]; + + // Are all these things actually defined? + for (auto &PHIIt : PHI->IncomingValues) { + // Any undef input means DBG_PHIs didn't dominate the use point. + if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end()) + return None; + + ValueIDNum ValueToCheck; + ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; + + auto VVal = ValidatedValues.find(PHIIt.first); + if (VVal == ValidatedValues.end()) { + // We cross a loop, and this is a backedge. LLVMs tail duplication + // happens so late that DBG_PHI instructions should not be able to + // migrate into loops -- meaning we can only be live-through this + // loop. + ValueToCheck = ThisBlockValueNum; + } else { + // Does the block have as a live-out, in the location we're examining, + // the value that we expect? If not, it's been moved or clobbered. + ValueToCheck = VVal->second; + } + + if (BlockLiveOuts[Loc.asU64()] != ValueToCheck) + return None; + } + + // Record this value as validated. + ValidatedValues.insert({PHI->ParentBlock, ThisBlockValueNum}); + } + + // All the PHIs are valid: we can return what the SSAUpdater said our value + // number was. + return Result; +} diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 770c46ec8436..38e803d1abb5 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" /// \file LiveDebugValues.cpp @@ -33,6 +34,12 @@ using namespace llvm; +static cl::opt<bool> + ForceInstrRefLDV("force-instr-ref-livedebugvalues", cl::Hidden, + cl::desc("Use instruction-ref based LiveDebugValues with " + "normal DBG_VALUE inputs"), + cl::init(false)); + /// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or /// InstrRefBasedLDV to perform location propagation, via the LDVImpl /// base class. @@ -87,6 +94,9 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { InstrRefBased = TM.Options.ValueTrackingVariableLocations; } + // Allow the user to force selection of InstrRef LDV. + InstrRefBased |= ForceInstrRefLDV; + if (InstrRefBased) TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); else diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index 6b05bc68d74d..9c910f180b9f 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H + #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -30,3 +33,5 @@ public: extern LDVImpl *makeVarLocBasedLiveDebugValues(); extern LDVImpl *makeInstrRefBasedLiveDebugValues(); } // namespace llvm + +#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index e2daa46fe6b9..1e6d65c18953 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -76,20 +76,23 @@ /// that are not through dataflow. /// /// Within LiveDebugValues: each variable location is represented by a -/// VarLoc object that identifies the source variable, its current -/// machine-location, and the DBG_VALUE inst that specifies the location. Each -/// VarLoc is indexed in the (function-scope) \p VarLocMap, giving each VarLoc a -/// unique index. Rather than operate directly on machine locations, the -/// dataflow analysis in this pass identifies locations by their index in the -/// VarLocMap, meaning all the variable locations in a block can be described -/// by a sparse vector of VarLocMap indicies. +/// VarLoc object that identifies the source variable, the set of +/// machine-locations that currently describe it (a single location for +/// DBG_VALUE or multiple for DBG_VALUE_LIST), and the DBG_VALUE inst that +/// specifies the location. Each VarLoc is indexed in the (function-scope) \p +/// VarLocMap, giving each VarLoc a set of unique indexes, each of which +/// corresponds to one of the VarLoc's machine-locations and can be used to +/// lookup the VarLoc in the VarLocMap. Rather than operate directly on machine +/// locations, the dataflow analysis in this pass identifies locations by their +/// indices in the VarLocMap, meaning all the variable locations in a block can +/// be described by a sparse vector of VarLocMap indicies. /// /// All the storage for the dataflow analysis is local to the ExtendRanges /// method and passed down to helper methods. "OutLocs" and "InLocs" record the /// in and out lattice values for each block. "OpenRanges" maintains a list of /// variable locations and, with the "process" method, evaluates the transfer -/// function of each block. "flushPendingLocs" installs DBG_VALUEs for each -/// live-in location at the start of blocks, while "Transfers" records +/// function of each block. "flushPendingLocs" installs debug value instructions +/// for each live-in location at the start of blocks, while "Transfers" records /// transfers of values between machine-locations. /// /// We avoid explicitly representing the "Unknown" (\top) lattice value in the @@ -175,17 +178,6 @@ static cl::opt<unsigned> InputDbgValueLimit( "Maximum input DBG_VALUE insts supported by debug range extension"), cl::init(50000), cl::Hidden); -// If @MI is a DBG_VALUE with debug value described by a defined -// register, returns the number of this register. In the other case, returns 0. -static Register isDbgValueDescribedByReg(const MachineInstr &MI) { - assert(MI.isDebugValue() && "expected a DBG_VALUE"); - assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); - // If location of variable is described using a register (directly - // or indirectly), this register is always a first operand. - return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg() - : Register(); -} - /// If \p Op is a stack or frame register return true, otherwise return false. /// This is used to avoid basing the debug entry values on the registers, since /// we do not support it at the moment. @@ -210,6 +202,13 @@ namespace { // this prevents fallback to std::set::count() operations. using DefinedRegsSet = SmallSet<Register, 32>; +// The IDs in this set correspond to MachineLocs in VarLocs, as well as VarLocs +// that represent Entry Values; every VarLoc in the set will also appear +// exactly once at Location=0. +// As a result, each VarLoc may appear more than once in this "set", but each +// range corresponding to a Reg, SpillLoc, or EntryValue type will still be a +// "true" set (i.e. each VarLoc may appear only once), and the range Location=0 +// is the set of all VarLocs. using VarLocSet = CoalescingBitVector<uint64_t>; /// A type-checked pair of {Register Location (or 0), Index}, used to index @@ -229,11 +228,19 @@ struct LocIndex { // here to encode non-register locations. u32_index_t Index; - /// The first location greater than 0 that is not reserved for VarLocs of - /// kind RegisterKind. + /// The location that has an entry for every VarLoc in the map. + static constexpr u32_location_t kUniversalLocation = 0; + + /// The first location that is reserved for VarLocs with locations of kind + /// RegisterKind. + static constexpr u32_location_t kFirstRegLocation = 1; + + /// The first location greater than 0 that is not reserved for VarLocs with + /// locations of kind RegisterKind. static constexpr u32_location_t kFirstInvalidRegLocation = 1 << 30; - /// A special location reserved for VarLocs of kind SpillLocKind. + /// A special location reserved for VarLocs with locations of kind + /// SpillLocKind. static constexpr u32_location_t kSpillLocation = kFirstInvalidRegLocation; /// A special location reserved for VarLocs of kind EntryValueBackupKind and @@ -258,7 +265,7 @@ struct LocIndex { /// Get the start of the interval reserved for VarLocs of kind RegisterKind /// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1. - static uint64_t rawIndexForReg(uint32_t Reg) { + static uint64_t rawIndexForReg(Register Reg) { return LocIndex(Reg, 0).getAsRawInteger(); } @@ -272,6 +279,13 @@ struct LocIndex { } }; +// Simple Set for storing all the VarLoc Indices at a Location bucket. +using VarLocsInRange = SmallSet<LocIndex::u32_index_t, 32>; +// Vector of all `LocIndex`s for a given VarLoc; the same Location should not +// appear in any two of these, as each VarLoc appears at most once in any +// Location bucket. +using LocIndices = SmallVector<LocIndex, 2>; + class VarLocBasedLDV : public LDVImpl { private: const TargetRegisterInfo *TRI; @@ -312,51 +326,130 @@ private: /// is moved. const MachineInstr &MI; - enum VarLocKind { + enum class MachineLocKind { InvalidKind = 0, RegisterKind, SpillLocKind, - ImmediateKind, + ImmediateKind + }; + + enum class EntryValueLocKind { + NonEntryValueKind = 0, EntryValueKind, EntryValueBackupKind, EntryValueCopyBackupKind - } Kind = InvalidKind; + } EVKind; /// The value location. Stored separately to avoid repeatedly /// extracting it from MI. - union LocUnion { + union MachineLocValue { uint64_t RegNo; SpillLoc SpillLocation; uint64_t Hash; int64_t Immediate; const ConstantFP *FPImm; const ConstantInt *CImm; - LocUnion() : Hash(0) {} - } Loc; + MachineLocValue() : Hash(0) {} + }; + + /// A single machine location; its Kind is either a register, spill + /// location, or immediate value. + /// If the VarLoc is not a NonEntryValueKind, then it will use only a + /// single MachineLoc of RegisterKind. + struct MachineLoc { + MachineLocKind Kind; + MachineLocValue Value; + bool operator==(const MachineLoc &Other) const { + if (Kind != Other.Kind) + return false; + switch (Kind) { + case MachineLocKind::SpillLocKind: + return Value.SpillLocation == Other.Value.SpillLocation; + case MachineLocKind::RegisterKind: + case MachineLocKind::ImmediateKind: + return Value.Hash == Other.Value.Hash; + default: + llvm_unreachable("Invalid kind"); + } + } + bool operator<(const MachineLoc &Other) const { + switch (Kind) { + case MachineLocKind::SpillLocKind: + return std::make_tuple( + Kind, Value.SpillLocation.SpillBase, + Value.SpillLocation.SpillOffset.getFixed(), + Value.SpillLocation.SpillOffset.getScalable()) < + std::make_tuple( + Other.Kind, Other.Value.SpillLocation.SpillBase, + Other.Value.SpillLocation.SpillOffset.getFixed(), + Other.Value.SpillLocation.SpillOffset.getScalable()); + case MachineLocKind::RegisterKind: + case MachineLocKind::ImmediateKind: + return std::tie(Kind, Value.Hash) < + std::tie(Other.Kind, Other.Value.Hash); + default: + llvm_unreachable("Invalid kind"); + } + } + }; + + /// The set of machine locations used to determine the variable's value, in + /// conjunction with Expr. Initially populated with MI's debug operands, + /// but may be transformed independently afterwards. + SmallVector<MachineLoc, 8> Locs; + /// Used to map the index of each location in Locs back to the index of its + /// original debug operand in MI. Used when multiple location operands are + /// coalesced and the original MI's operands need to be accessed while + /// emitting a debug value. + SmallVector<unsigned, 8> OrigLocMap; VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()), - Expr(MI.getDebugExpression()), MI(MI) { + Expr(MI.getDebugExpression()), MI(MI), + EVKind(EntryValueLocKind::NonEntryValueKind) { assert(MI.isDebugValue() && "not a DBG_VALUE"); - assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); - if (int RegNo = isDbgValueDescribedByReg(MI)) { - Kind = RegisterKind; - Loc.RegNo = RegNo; - } else if (MI.getDebugOperand(0).isImm()) { - Kind = ImmediateKind; - Loc.Immediate = MI.getDebugOperand(0).getImm(); - } else if (MI.getDebugOperand(0).isFPImm()) { - Kind = ImmediateKind; - Loc.FPImm = MI.getDebugOperand(0).getFPImm(); - } else if (MI.getDebugOperand(0).isCImm()) { - Kind = ImmediateKind; - Loc.CImm = MI.getDebugOperand(0).getCImm(); + assert((MI.isDebugValueList() || MI.getNumOperands() == 4) && + "malformed DBG_VALUE"); + for (const MachineOperand &Op : MI.debug_operands()) { + MachineLoc ML = GetLocForOp(Op); + auto It = find(Locs, ML); + if (It == Locs.end()) { + Locs.push_back(ML); + OrigLocMap.push_back(MI.getDebugOperandIndex(&Op)); + } else { + // ML duplicates an element in Locs; replace references to Op + // with references to the duplicating element. + unsigned OpIdx = Locs.size(); + unsigned DuplicatingIdx = std::distance(Locs.begin(), It); + Expr = DIExpression::replaceArg(Expr, OpIdx, DuplicatingIdx); + } } - // We create the debug entry values from the factory functions rather than - // from this ctor. - assert(Kind != EntryValueKind && !isEntryBackupLoc()); + // We create the debug entry values from the factory functions rather + // than from this ctor. + assert(EVKind != EntryValueLocKind::EntryValueKind && + !isEntryBackupLoc()); + } + + static MachineLoc GetLocForOp(const MachineOperand &Op) { + MachineLocKind Kind; + MachineLocValue Loc; + if (Op.isReg()) { + Kind = MachineLocKind::RegisterKind; + Loc.RegNo = Op.getReg(); + } else if (Op.isImm()) { + Kind = MachineLocKind::ImmediateKind; + Loc.Immediate = Op.getImm(); + } else if (Op.isFPImm()) { + Kind = MachineLocKind::ImmediateKind; + Loc.FPImm = Op.getFPImm(); + } else if (Op.isCImm()) { + Kind = MachineLocKind::ImmediateKind; + Loc.CImm = Op.getCImm(); + } else + llvm_unreachable("Invalid Op kind for MachineLoc."); + return {Kind, Loc}; } /// Take the variable and machine-location in DBG_VALUE MI, and build an @@ -364,10 +457,11 @@ private: static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, const DIExpression *EntryExpr, Register Reg) { VarLoc VL(MI, LS); - assert(VL.Kind == RegisterKind); - VL.Kind = EntryValueKind; + assert(VL.Locs.size() == 1 && + VL.Locs[0].Kind == MachineLocKind::RegisterKind); + VL.EVKind = EntryValueLocKind::EntryValueKind; VL.Expr = EntryExpr; - VL.Loc.RegNo = Reg; + VL.Locs[0].Value.RegNo = Reg; return VL; } @@ -379,8 +473,9 @@ private: LexicalScopes &LS, const DIExpression *EntryExpr) { VarLoc VL(MI, LS); - assert(VL.Kind == RegisterKind); - VL.Kind = EntryValueBackupKind; + assert(VL.Locs.size() == 1 && + VL.Locs[0].Kind == MachineLocKind::RegisterKind); + VL.EVKind = EntryValueLocKind::EntryValueBackupKind; VL.Expr = EntryExpr; return VL; } @@ -393,32 +488,40 @@ private: const DIExpression *EntryExpr, Register NewReg) { VarLoc VL(MI, LS); - assert(VL.Kind == RegisterKind); - VL.Kind = EntryValueCopyBackupKind; + assert(VL.Locs.size() == 1 && + VL.Locs[0].Kind == MachineLocKind::RegisterKind); + VL.EVKind = EntryValueLocKind::EntryValueCopyBackupKind; VL.Expr = EntryExpr; - VL.Loc.RegNo = NewReg; + VL.Locs[0].Value.RegNo = NewReg; return VL; } /// Copy the register location in DBG_VALUE MI, updating the register to /// be NewReg. - static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS, + static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML, Register NewReg) { - VarLoc VL(MI, LS); - assert(VL.Kind == RegisterKind); - VL.Loc.RegNo = NewReg; - return VL; + VarLoc VL = OldVL; + for (size_t I = 0, E = VL.Locs.size(); I < E; ++I) + if (VL.Locs[I] == OldML) { + VL.Locs[I].Kind = MachineLocKind::RegisterKind; + VL.Locs[I].Value.RegNo = NewReg; + return VL; + } + llvm_unreachable("Should have found OldML in new VarLoc."); } - /// Take the variable described by DBG_VALUE MI, and create a VarLoc + /// Take the variable described by DBG_VALUE* MI, and create a VarLoc /// locating it in the specified spill location. - static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase, - StackOffset SpillOffset, LexicalScopes &LS) { - VarLoc VL(MI, LS); - assert(VL.Kind == RegisterKind); - VL.Kind = SpillLocKind; - VL.Loc.SpillLocation = {SpillBase, SpillOffset}; - return VL; + static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML, + unsigned SpillBase, StackOffset SpillOffset) { + VarLoc VL = OldVL; + for (int I = 0, E = VL.Locs.size(); I < E; ++I) + if (VL.Locs[I] == OldML) { + VL.Locs[I].Kind = MachineLocKind::SpillLocKind; + VL.Locs[I].Value.SpillLocation = {SpillBase, SpillOffset}; + return VL; + } + llvm_unreachable("Should have found OldML in new VarLoc."); } /// Create a DBG_VALUE representing this VarLoc in the given function. @@ -426,79 +529,143 @@ private: /// inlining information from the original DBG_VALUE instruction, which may /// have been several transfers ago. MachineInstr *BuildDbgValue(MachineFunction &MF) const { + assert(!isEntryBackupLoc() && + "Tried to produce DBG_VALUE for backup VarLoc"); const DebugLoc &DbgLoc = MI.getDebugLoc(); bool Indirect = MI.isIndirectDebugValue(); const auto &IID = MI.getDesc(); const DILocalVariable *Var = MI.getDebugVariable(); - const DIExpression *DIExpr = MI.getDebugExpression(); NumInserted++; - switch (Kind) { - case EntryValueKind: - // An entry value is a register location -- but with an updated - // expression. The register location of such DBG_VALUE is always the one - // from the entry DBG_VALUE, it does not matter if the entry value was - // copied in to another register due to some optimizations. - return BuildMI(MF, DbgLoc, IID, Indirect, - MI.getDebugOperand(0).getReg(), Var, Expr); - case RegisterKind: - // Register locations are like the source DBG_VALUE, but with the - // register number from this VarLoc. - return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr); - case SpillLocKind: { - // Spills are indirect DBG_VALUEs, with a base register and offset. - // Use the original DBG_VALUEs expression to build the spilt location - // on top of. FIXME: spill locations created before this pass runs - // are not recognized, and not handled here. - auto *TRI = MF.getSubtarget().getRegisterInfo(); - auto *SpillExpr = TRI->prependOffsetExpression( - DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset); - unsigned Base = Loc.SpillLocation.SpillBase; - return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); - } - case ImmediateKind: { - MachineOperand MO = MI.getDebugOperand(0); - return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); - } - case EntryValueBackupKind: - case EntryValueCopyBackupKind: - case InvalidKind: - llvm_unreachable( - "Tried to produce DBG_VALUE for invalid or backup VarLoc"); + const DIExpression *DIExpr = Expr; + SmallVector<MachineOperand, 8> MOs; + for (unsigned I = 0, E = Locs.size(); I < E; ++I) { + MachineLocKind LocKind = Locs[I].Kind; + MachineLocValue Loc = Locs[I].Value; + const MachineOperand &Orig = MI.getDebugOperand(OrigLocMap[I]); + switch (LocKind) { + case MachineLocKind::RegisterKind: + // An entry value is a register location -- but with an updated + // expression. The register location of such DBG_VALUE is always the + // one from the entry DBG_VALUE, it does not matter if the entry value + // was copied in to another register due to some optimizations. + // Non-entry value register locations are like the source + // DBG_VALUE, but with the register number from this VarLoc. + MOs.push_back(MachineOperand::CreateReg( + EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg() + : Register(Loc.RegNo), + false)); + MOs.back().setIsDebug(); + break; + case MachineLocKind::SpillLocKind: { + // Spills are indirect DBG_VALUEs, with a base register and offset. + // Use the original DBG_VALUEs expression to build the spilt location + // on top of. FIXME: spill locations created before this pass runs + // are not recognized, and not handled here. + unsigned Base = Loc.SpillLocation.SpillBase; + auto *TRI = MF.getSubtarget().getRegisterInfo(); + if (MI.isNonListDebugValue()) { + DIExpr = + TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset, + Loc.SpillLocation.SpillOffset); + Indirect = true; + } else { + SmallVector<uint64_t, 4> Ops; + TRI->getOffsetOpcodes(Loc.SpillLocation.SpillOffset, Ops); + Ops.push_back(dwarf::DW_OP_deref); + DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I); + } + MOs.push_back(MachineOperand::CreateReg(Base, false)); + MOs.back().setIsDebug(); + break; + } + case MachineLocKind::ImmediateKind: { + MOs.push_back(Orig); + break; + } + case MachineLocKind::InvalidKind: + llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + } } - llvm_unreachable("Unrecognized VarLocBasedLDV.VarLoc.Kind enum"); + return BuildMI(MF, DbgLoc, IID, Indirect, MOs, Var, DIExpr); } /// Is the Loc field a constant or constant object? - bool isConstant() const { return Kind == ImmediateKind; } + bool isConstant(MachineLocKind Kind) const { + return Kind == MachineLocKind::ImmediateKind; + } /// Check if the Loc field is an entry backup location. bool isEntryBackupLoc() const { - return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind; + return EVKind == EntryValueLocKind::EntryValueBackupKind || + EVKind == EntryValueLocKind::EntryValueCopyBackupKind; } - /// If this variable is described by a register holding the entry value, - /// return it, otherwise return 0. - unsigned getEntryValueBackupReg() const { - if (Kind == EntryValueBackupKind) - return Loc.RegNo; - return 0; + /// If this variable is described by register \p Reg holding the entry + /// value, return true. + bool isEntryValueBackupReg(Register Reg) const { + return EVKind == EntryValueLocKind::EntryValueBackupKind && usesReg(Reg); } - /// If this variable is described by a register holding the copy of the - /// entry value, return it, otherwise return 0. - unsigned getEntryValueCopyBackupReg() const { - if (Kind == EntryValueCopyBackupKind) - return Loc.RegNo; - return 0; + /// If this variable is described by register \p Reg holding a copy of the + /// entry value, return true. + bool isEntryValueCopyBackupReg(Register Reg) const { + return EVKind == EntryValueLocKind::EntryValueCopyBackupKind && + usesReg(Reg); } - /// If this variable is described by a register, return it, - /// otherwise return 0. - unsigned isDescribedByReg() const { - if (Kind == RegisterKind) - return Loc.RegNo; - return 0; + /// If this variable is described in whole or part by \p Reg, return true. + bool usesReg(Register Reg) const { + MachineLoc RegML; + RegML.Kind = MachineLocKind::RegisterKind; + RegML.Value.RegNo = Reg; + return is_contained(Locs, RegML); + } + + /// If this variable is described in whole or part by \p Reg, return true. + unsigned getRegIdx(Register Reg) const { + for (unsigned Idx = 0; Idx < Locs.size(); ++Idx) + if (Locs[Idx].Kind == MachineLocKind::RegisterKind && + Locs[Idx].Value.RegNo == Reg) + return Idx; + llvm_unreachable("Could not find given Reg in Locs"); + } + + /// If this variable is described in whole or part by 1 or more registers, + /// add each of them to \p Regs and return true. + bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const { + bool AnyRegs = false; + for (auto Loc : Locs) + if (Loc.Kind == MachineLocKind::RegisterKind) { + Regs.push_back(Loc.Value.RegNo); + AnyRegs = true; + } + return AnyRegs; + } + + bool containsSpillLocs() const { + return any_of(Locs, [](VarLoc::MachineLoc ML) { + return ML.Kind == VarLoc::MachineLocKind::SpillLocKind; + }); + } + + /// If this variable is described in whole or part by \p SpillLocation, + /// return true. + bool usesSpillLoc(SpillLoc SpillLocation) const { + MachineLoc SpillML; + SpillML.Kind = MachineLocKind::SpillLocKind; + SpillML.Value.SpillLocation = SpillLocation; + return is_contained(Locs, SpillML); + } + + /// If this variable is described in whole or part by \p SpillLocation, + /// return the index . + unsigned getSpillLocIdx(SpillLoc SpillLocation) const { + for (unsigned Idx = 0; Idx < Locs.size(); ++Idx) + if (Locs[Idx].Kind == MachineLocKind::SpillLocKind && + Locs[Idx].Value.SpillLocation == SpillLocation) + return Idx; + llvm_unreachable("Could not find given SpillLoc in Locs"); } /// Determine whether the lexical scope of this value's debug location @@ -511,24 +678,26 @@ private: // TRI can be null. void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { Out << "VarLoc("; - switch (Kind) { - case RegisterKind: - case EntryValueKind: - case EntryValueBackupKind: - case EntryValueCopyBackupKind: - Out << printReg(Loc.RegNo, TRI); - break; - case SpillLocKind: - Out << printReg(Loc.SpillLocation.SpillBase, TRI); - Out << "[" << Loc.SpillLocation.SpillOffset.getFixed() << " + " - << Loc.SpillLocation.SpillOffset.getScalable() << "x vscale" - << "]"; - break; - case ImmediateKind: - Out << Loc.Immediate; - break; - case InvalidKind: - llvm_unreachable("Invalid VarLoc in dump method"); + for (const MachineLoc &MLoc : Locs) { + if (Locs.begin() != &MLoc) + Out << ", "; + switch (MLoc.Kind) { + case MachineLocKind::RegisterKind: + Out << printReg(MLoc.Value.RegNo, TRI); + break; + case MachineLocKind::SpillLocKind: + Out << printReg(MLoc.Value.SpillLocation.SpillBase, TRI); + Out << "[" << MLoc.Value.SpillLocation.SpillOffset.getFixed() << " + " + << MLoc.Value.SpillLocation.SpillOffset.getScalable() + << "x vscale" + << "]"; + break; + case MachineLocKind::ImmediateKind: + Out << MLoc.Value.Immediate; + break; + case MachineLocKind::InvalidKind: + llvm_unreachable("Invalid VarLoc in dump method"); + } } Out << ", \"" << Var.getVariable()->getName() << "\", " << *Expr << ", "; @@ -545,90 +714,76 @@ private: #endif bool operator==(const VarLoc &Other) const { - if (Kind != Other.Kind || !(Var == Other.Var) || Expr != Other.Expr) - return false; - - switch (Kind) { - case SpillLocKind: - return Loc.SpillLocation == Other.Loc.SpillLocation; - case RegisterKind: - case ImmediateKind: - case EntryValueKind: - case EntryValueBackupKind: - case EntryValueCopyBackupKind: - return Loc.Hash == Other.Loc.Hash; - default: - llvm_unreachable("Invalid kind"); - } + return std::tie(EVKind, Var, Expr, Locs) == + std::tie(Other.EVKind, Other.Var, Other.Expr, Other.Locs); } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - switch (Kind) { - case SpillLocKind: - return std::make_tuple(Var, Kind, Loc.SpillLocation.SpillBase, - Loc.SpillLocation.SpillOffset.getFixed(), - Loc.SpillLocation.SpillOffset.getScalable(), - Expr) < - std::make_tuple( - Other.Var, Other.Kind, Other.Loc.SpillLocation.SpillBase, - Other.Loc.SpillLocation.SpillOffset.getFixed(), - Other.Loc.SpillLocation.SpillOffset.getScalable(), - Other.Expr); - case RegisterKind: - case ImmediateKind: - case EntryValueKind: - case EntryValueBackupKind: - case EntryValueCopyBackupKind: - return std::tie(Var, Kind, Loc.Hash, Expr) < - std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); - default: - llvm_unreachable("Invalid kind"); - } + return std::tie(Var, EVKind, Locs, Expr) < + std::tie(Other.Var, Other.EVKind, Other.Locs, Other.Expr); } }; +#ifndef NDEBUG + using VarVec = SmallVector<VarLoc, 32>; +#endif + /// VarLocMap is used for two things: - /// 1) Assigning a unique LocIndex to a VarLoc. This LocIndex can be used to + /// 1) Assigning LocIndices to a VarLoc. The LocIndices can be used to /// virtually insert a VarLoc into a VarLocSet. /// 2) Given a LocIndex, look up the unique associated VarLoc. class VarLocMap { /// Map a VarLoc to an index within the vector reserved for its location /// within Loc2Vars. - std::map<VarLoc, LocIndex::u32_index_t> Var2Index; + std::map<VarLoc, LocIndices> Var2Indices; /// Map a location to a vector which holds VarLocs which live in that /// location. SmallDenseMap<LocIndex::u32_location_t, std::vector<VarLoc>> Loc2Vars; - /// Determine the 32-bit location reserved for \p VL, based on its kind. - static LocIndex::u32_location_t getLocationForVar(const VarLoc &VL) { - switch (VL.Kind) { - case VarLoc::RegisterKind: - assert((VL.Loc.RegNo < LocIndex::kFirstInvalidRegLocation) && + public: + /// Retrieve LocIndices for \p VL. + LocIndices insert(const VarLoc &VL) { + LocIndices &Indices = Var2Indices[VL]; + // If Indices is not empty, VL is already in the map. + if (!Indices.empty()) + return Indices; + SmallVector<LocIndex::u32_location_t, 4> Locations; + // LocIndices are determined by EVKind and MLs; each Register has a + // unique location, while all SpillLocs use a single bucket, and any EV + // VarLocs use only the Backup bucket or none at all (except the + // compulsory entry at the universal location index). LocIndices will + // always have an index at the universal location index as the last index. + if (VL.EVKind == VarLoc::EntryValueLocKind::NonEntryValueKind) { + VL.getDescribingRegs(Locations); + assert(all_of(Locations, + [](auto RegNo) { + return RegNo < LocIndex::kFirstInvalidRegLocation; + }) && "Physreg out of range?"); - return VL.Loc.RegNo; - case VarLoc::SpillLocKind: - return LocIndex::kSpillLocation; - case VarLoc::EntryValueBackupKind: - case VarLoc::EntryValueCopyBackupKind: - return LocIndex::kEntryValueBackupLocation; - default: - return 0; + if (VL.containsSpillLocs()) { + LocIndex::u32_location_t Loc = LocIndex::kSpillLocation; + Locations.push_back(Loc); + } + } else if (VL.EVKind != VarLoc::EntryValueLocKind::EntryValueKind) { + LocIndex::u32_location_t Loc = LocIndex::kEntryValueBackupLocation; + Locations.push_back(Loc); } - } - - public: - /// Retrieve a unique LocIndex for \p VL. - LocIndex insert(const VarLoc &VL) { - LocIndex::u32_location_t Location = getLocationForVar(VL); - LocIndex::u32_index_t &Index = Var2Index[VL]; - if (!Index) { + Locations.push_back(LocIndex::kUniversalLocation); + for (LocIndex::u32_location_t Location : Locations) { auto &Vars = Loc2Vars[Location]; + Indices.push_back( + {Location, static_cast<LocIndex::u32_index_t>(Vars.size())}); Vars.push_back(VL); - Index = Vars.size(); } - return {Location, Index - 1}; + return Indices; + } + + LocIndices getAllIndices(const VarLoc &VL) const { + auto IndIt = Var2Indices.find(VL); + assert(IndIt != Var2Indices.end() && "VarLoc not tracked"); + return IndIt->second; } /// Retrieve the unique VarLoc associated with \p ID. @@ -660,6 +815,17 @@ private: using VarToFragments = DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>; + /// Collects all VarLocs from \p CollectFrom. Each unique VarLoc is added + /// to \p Collected once, in order of insertion into \p VarLocIDs. + static void collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected, + const VarLocSet &CollectFrom, + const VarLocMap &VarLocIDs); + + /// Get the registers which are used by VarLocs of kind RegisterKind tracked + /// by \p CollectFrom. + void getUsedRegs(const VarLocSet &CollectFrom, + SmallVectorImpl<Register> &UsedRegs) const; + /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all @@ -670,39 +836,45 @@ private: /// we will erase/insert from the EntryValuesBackupVars map, otherwise /// we perform the operation on the Vars. class OpenRangesSet { + VarLocSet::Allocator &Alloc; VarLocSet VarLocs; // Map the DebugVariable to recent primary location ID. - SmallDenseMap<DebugVariable, LocIndex, 8> Vars; + SmallDenseMap<DebugVariable, LocIndices, 8> Vars; // Map the DebugVariable to recent backup location ID. - SmallDenseMap<DebugVariable, LocIndex, 8> EntryValuesBackupVars; + SmallDenseMap<DebugVariable, LocIndices, 8> EntryValuesBackupVars; OverlapMap &OverlappingFragments; public: OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap) - : VarLocs(Alloc), OverlappingFragments(_OLapMap) {} + : Alloc(Alloc), VarLocs(Alloc), OverlappingFragments(_OLapMap) {} const VarLocSet &getVarLocs() const { return VarLocs; } + // Fetches all VarLocs in \p VarLocIDs and inserts them into \p Collected. + // This method is needed to get every VarLoc once, as each VarLoc may have + // multiple indices in a VarLocMap (corresponding to each applicable + // location), but all VarLocs appear exactly once at the universal location + // index. + void getUniqueVarLocs(SmallVectorImpl<VarLoc> &Collected, + const VarLocMap &VarLocIDs) const { + collectAllVarLocs(Collected, VarLocs, VarLocIDs); + } + /// Terminate all open ranges for VL.Var by removing it from the set. void erase(const VarLoc &VL); - /// Terminate all open ranges listed in \c KillSet by removing - /// them from the set. - void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs); + /// Terminate all open ranges listed as indices in \c KillSet with + /// \c Location by removing them from the set. + void erase(const VarLocsInRange &KillSet, const VarLocMap &VarLocIDs, + LocIndex::u32_location_t Location); /// Insert a new range into the set. - void insert(LocIndex VarLocID, const VarLoc &VL); + void insert(LocIndices VarLocIDs, const VarLoc &VL); /// Insert a set of ranges. - void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { - for (uint64_t ID : ToLoad) { - LocIndex Idx = LocIndex::fromRawInteger(ID); - const VarLoc &VarL = Map[Idx]; - insert(Idx, VarL); - } - } + void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map); - llvm::Optional<LocIndex> getEntryValueBackup(DebugVariable Var); + llvm::Optional<LocIndices> getEntryValueBackup(DebugVariable Var); /// Empty the set. void clear() { @@ -725,18 +897,18 @@ private: getVarLocs().end()); } - /// Get all set IDs for VarLocs of kind RegisterKind in \p Reg. + /// Get all set IDs for VarLocs with MLs of kind RegisterKind in \p Reg. auto getRegisterVarLocs(Register Reg) const { return LocIndex::indexRangeForLocation(getVarLocs(), Reg); } - /// Get all set IDs for VarLocs of kind SpillLocKind. + /// Get all set IDs for VarLocs with MLs of kind SpillLocKind. auto getSpillVarLocs() const { return LocIndex::indexRangeForLocation(getVarLocs(), LocIndex::kSpillLocation); } - /// Get all set IDs for VarLocs of kind EntryValueBackupKind or + /// Get all set IDs for VarLocs of EVKind EntryValueBackupKind or /// EntryValueCopyBackupKind. auto getEntryValueBackupVarLocs() const { return LocIndex::indexRangeForLocation( @@ -744,16 +916,14 @@ private: } }; - /// Collect all VarLoc IDs from \p CollectFrom for VarLocs of kind - /// RegisterKind which are located in any reg in \p Regs. Insert collected IDs - /// into \p Collected. - void collectIDsForRegs(VarLocSet &Collected, const DefinedRegsSet &Regs, - const VarLocSet &CollectFrom) const; - - /// Get the registers which are used by VarLocs of kind RegisterKind tracked - /// by \p CollectFrom. - void getUsedRegs(const VarLocSet &CollectFrom, - SmallVectorImpl<uint32_t> &UsedRegs) const; + /// Collect all VarLoc IDs from \p CollectFrom for VarLocs with MLs of kind + /// RegisterKind which are located in any reg in \p Regs. The IDs for each + /// VarLoc correspond to entries in the universal location bucket, which every + /// VarLoc has exactly 1 entry for. Insert collected IDs into \p Collected. + static void collectIDsForRegs(VarLocsInRange &Collected, + const DefinedRegsSet &Regs, + const VarLocSet &CollectFrom, + const VarLocMap &VarLocIDs); VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) { std::unique_ptr<VarLocSet> &VLS = Locs[MBB]; @@ -800,6 +970,7 @@ private: void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind, + const VarLoc::MachineLoc &OldLoc, Register NewReg = Register()); void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, @@ -810,7 +981,7 @@ private: VarLocMap &VarLocIDs, const VarLoc &EntryVL); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - VarLocSet &KillSet); + VarLocsInRange &KillSet); void recordEntryValue(const MachineInstr &MI, const DefinedRegsSet &DefinedRegs, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); @@ -871,8 +1042,9 @@ void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) { auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; auto It = EraseFrom->find(VarToErase); if (It != EraseFrom->end()) { - LocIndex ID = It->second; - VarLocs.reset(ID.getAsRawInteger()); + LocIndices IDs = It->second; + for (LocIndex ID : IDs) + VarLocs.reset(ID.getAsRawInteger()); EraseFrom->erase(It); } }; @@ -899,26 +1071,46 @@ void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) { } } -void VarLocBasedLDV::OpenRangesSet::erase(const VarLocSet &KillSet, - const VarLocMap &VarLocIDs) { - VarLocs.intersectWithComplement(KillSet); - for (uint64_t ID : KillSet) { - const VarLoc *VL = &VarLocIDs[LocIndex::fromRawInteger(ID)]; - auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; - EraseFrom->erase(VL->Var); +void VarLocBasedLDV::OpenRangesSet::erase(const VarLocsInRange &KillSet, + const VarLocMap &VarLocIDs, + LocIndex::u32_location_t Location) { + VarLocSet RemoveSet(Alloc); + for (LocIndex::u32_index_t ID : KillSet) { + const VarLoc &VL = VarLocIDs[LocIndex(Location, ID)]; + auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + EraseFrom->erase(VL.Var); + LocIndices VLI = VarLocIDs.getAllIndices(VL); + for (LocIndex ID : VLI) + RemoveSet.set(ID.getAsRawInteger()); + } + VarLocs.intersectWithComplement(RemoveSet); +} + +void VarLocBasedLDV::OpenRangesSet::insertFromLocSet(const VarLocSet &ToLoad, + const VarLocMap &Map) { + VarLocsInRange UniqueVarLocIDs; + DefinedRegsSet Regs; + Regs.insert(LocIndex::kUniversalLocation); + collectIDsForRegs(UniqueVarLocIDs, Regs, ToLoad, Map); + for (uint64_t ID : UniqueVarLocIDs) { + LocIndex Idx = LocIndex::fromRawInteger(ID); + const VarLoc &VarL = Map[Idx]; + const LocIndices Indices = Map.getAllIndices(VarL); + insert(Indices, VarL); } } -void VarLocBasedLDV::OpenRangesSet::insert(LocIndex VarLocID, - const VarLoc &VL) { +void VarLocBasedLDV::OpenRangesSet::insert(LocIndices VarLocIDs, + const VarLoc &VL) { auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; - VarLocs.set(VarLocID.getAsRawInteger()); - InsertInto->insert({VL.Var, VarLocID}); + for (LocIndex ID : VarLocIDs) + VarLocs.set(ID.getAsRawInteger()); + InsertInto->insert({VL.Var, VarLocIDs}); } /// Return the Loc ID of an entry value backup location, if it exists for the /// variable. -llvm::Optional<LocIndex> +llvm::Optional<LocIndices> VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { auto It = EntryValuesBackupVars.find(Var); if (It != EntryValuesBackupVars.end()) @@ -927,26 +1119,35 @@ VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { return llvm::None; } -void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected, - const DefinedRegsSet &Regs, - const VarLocSet &CollectFrom) const { +void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected, + const DefinedRegsSet &Regs, + const VarLocSet &CollectFrom, + const VarLocMap &VarLocIDs) { assert(!Regs.empty() && "Nothing to collect"); - SmallVector<uint32_t, 32> SortedRegs; - for (Register Reg : Regs) - SortedRegs.push_back(Reg); + SmallVector<Register, 32> SortedRegs; + append_range(SortedRegs, Regs); array_pod_sort(SortedRegs.begin(), SortedRegs.end()); auto It = CollectFrom.find(LocIndex::rawIndexForReg(SortedRegs.front())); auto End = CollectFrom.end(); - for (uint32_t Reg : SortedRegs) { - // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all - // possible VarLoc IDs for VarLocs of kind RegisterKind which live in Reg. + for (Register Reg : SortedRegs) { + // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains + // all possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which + // live in Reg. uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg); uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1); It.advanceToLowerBound(FirstIndexForReg); // Iterate through that half-open interval and collect all the set IDs. - for (; It != End && *It < FirstInvalidIndex; ++It) - Collected.set(*It); + for (; It != End && *It < FirstInvalidIndex; ++It) { + LocIndex ItIdx = LocIndex::fromRawInteger(*It); + const VarLoc &VL = VarLocIDs[ItIdx]; + LocIndices LI = VarLocIDs.getAllIndices(VL); + // For now, the back index is always the universal location index. + assert(LI.back().Location == LocIndex::kUniversalLocation && + "Unexpected order of LocIndices for VarLoc; was it inserted into " + "the VarLocMap correctly?"); + Collected.insert(LI.back().Index); + } if (It == End) return; @@ -954,10 +1155,11 @@ void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected, } void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom, - SmallVectorImpl<uint32_t> &UsedRegs) const { + SmallVectorImpl<Register> &UsedRegs) const { // All register-based VarLocs are assigned indices greater than or equal to // FirstRegIndex. - uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1); + uint64_t FirstRegIndex = + LocIndex::rawIndexForReg(LocIndex::kFirstRegLocation); uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation); for (auto It = CollectFrom.find(FirstRegIndex), @@ -995,9 +1197,10 @@ void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF, const VarLocSet &L = getVarLocsInMBB(&BB, V); if (L.empty()) continue; + SmallVector<VarLoc, 32> VarLocs; + collectAllVarLocs(VarLocs, L, VarLocIDs); Out << "MBB: " << BB.getNumber() << ":\n"; - for (uint64_t VLL : L) { - const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(VLL)]; + for (const VarLoc &VL : VarLocs) { Out << " Var: " << VL.Var.getVariable()->getName(); Out << " MI: "; VL.dump(TRI, Out); @@ -1044,11 +1247,11 @@ bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, // If the DBG_VALUE comes from a copy instruction that copies the entry value, // it means the parameter's value has not changed and we should be able to use // its entry value. - bool TrySalvageEntryValue = false; Register Reg = MI.getDebugOperand(0).getReg(); auto I = std::next(MI.getReverseIterator()); const MachineOperand *SrcRegOp, *DestRegOp; if (I != MI.getParent()->rend()) { + // TODO: Try to keep tracking of an entry value if we encounter a propagated // DBG_VALUE describing the copy of the entry value. (Propagated entry value // does not indicate the parameter modification.) @@ -1060,13 +1263,11 @@ bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI, DestRegOp = DestSrc->Destination; if (Reg != DestRegOp->getReg()) return true; - TrySalvageEntryValue = true; - } - if (TrySalvageEntryValue) { for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)]; - if (VL.getEntryValueCopyBackupReg() == Reg && + if (VL.isEntryValueCopyBackupReg(Reg) && + // Entry Values should not be variadic. VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg()) return false; } @@ -1095,7 +1296,7 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, // If that is the case, we should stop tracking its entry value. auto EntryValBackupID = OpenRanges.getEntryValueBackup(V); if (Var->isParameter() && EntryValBackupID) { - const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; + const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()]; if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) { LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; MI.print(dbgs(), /*IsStandalone*/ false, @@ -1105,59 +1306,79 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI, } } - if (isDbgValueDescribedByReg(MI) || MI.getDebugOperand(0).isImm() || - MI.getDebugOperand(0).isFPImm() || MI.getDebugOperand(0).isCImm()) { + if (all_of(MI.debug_operands(), [](const MachineOperand &MO) { + return (MO.isReg() && MO.getReg()) || MO.isImm() || MO.isFPImm() || + MO.isCImm(); + })) { // Use normal VarLoc constructor for registers and immediates. VarLoc VL(MI, LS); // End all previous ranges of VL.Var. OpenRanges.erase(VL); - LocIndex ID = VarLocIDs.insert(VL); + LocIndices IDs = VarLocIDs.insert(VL); // Add the VarLoc to OpenRanges from this DBG_VALUE. - OpenRanges.insert(ID, VL); - } else if (MI.hasOneMemOperand()) { + OpenRanges.insert(IDs, VL); + } else if (MI.memoperands().size() > 0) { llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { // This must be an undefined location. If it has an open range, erase it. - assert(MI.getDebugOperand(0).isReg() && - MI.getDebugOperand(0).getReg() == 0 && + assert(MI.isUndefDebugValue() && "Unexpected non-undef DBG_VALUE encountered"); VarLoc VL(MI, LS); OpenRanges.erase(VL); } } +// This should be removed later, doesn't fit the new design. +void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected, + const VarLocSet &CollectFrom, + const VarLocMap &VarLocIDs) { + // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all + // possible VarLoc IDs for VarLocs with MLs of kind RegisterKind which live + // in Reg. + uint64_t FirstIndex = LocIndex::rawIndexForReg(LocIndex::kUniversalLocation); + uint64_t FirstInvalidIndex = + LocIndex::rawIndexForReg(LocIndex::kUniversalLocation + 1); + // Iterate through that half-open interval and collect all the set IDs. + for (auto It = CollectFrom.find(FirstIndex), End = CollectFrom.end(); + It != End && *It < FirstInvalidIndex; ++It) { + LocIndex RegIdx = LocIndex::fromRawInteger(*It); + Collected.push_back(VarLocIDs[RegIdx]); + } +} + /// Turn the entry value backup locations into primary locations. void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, - TransferMap &Transfers, - VarLocSet &KillSet) { + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + TransferMap &Transfers, + VarLocsInRange &KillSet) { // Do not insert entry value locations after a terminator. if (MI.isTerminator()) return; - for (uint64_t ID : KillSet) { - LocIndex Idx = LocIndex::fromRawInteger(ID); + for (uint32_t ID : KillSet) { + // The KillSet IDs are indices for the universal location bucket. + LocIndex Idx = LocIndex(LocIndex::kUniversalLocation, ID); const VarLoc &VL = VarLocIDs[Idx]; if (!VL.Var.getVariable()->isParameter()) continue; auto DebugVar = VL.Var; - Optional<LocIndex> EntryValBackupID = + Optional<LocIndices> EntryValBackupIDs = OpenRanges.getEntryValueBackup(DebugVar); // If the parameter has the entry value backup, it means we should // be able to use its entry value. - if (!EntryValBackupID) + if (!EntryValBackupIDs) continue; - const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; - VarLoc EntryLoc = - VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo); - LocIndex EntryValueID = VarLocIDs.insert(EntryLoc); - Transfers.push_back({&MI, EntryValueID}); - OpenRanges.insert(EntryValueID, EntryLoc); + const VarLoc &EntryVL = VarLocIDs[EntryValBackupIDs->back()]; + VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, + EntryVL.Locs[0].Value.RegNo); + LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValueIDs.back()}); + OpenRanges.insert(EntryValueIDs, EntryLoc); } } @@ -1169,20 +1390,20 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI, void VarLocBasedLDV::insertTransferDebugPair( MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind, - Register NewReg) { - const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; + const VarLoc::MachineLoc &OldLoc, Register NewReg) { + const VarLoc &OldVarLoc = VarLocIDs[OldVarID]; auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) { - LocIndex LocId = VarLocIDs.insert(VL); + LocIndices LocIds = VarLocIDs.insert(VL); // Close this variable's previous location range. OpenRanges.erase(VL); // Record the new location as an open range, and a postponed transfer // inserting a DBG_VALUE for this location. - OpenRanges.insert(LocId, VL); + OpenRanges.insert(LocIds, VL); assert(!MI.isTerminator() && "Cannot insert DBG_VALUE after terminator"); - TransferDebugPair MIP = {&MI, LocId}; + TransferDebugPair MIP = {&MI, LocIds.back()}; Transfers.push_back(MIP); }; @@ -1194,7 +1415,7 @@ void VarLocBasedLDV::insertTransferDebugPair( "No register supplied when handling a copy of a debug value"); // Create a DBG_VALUE instruction to describe the Var in its new // register location. - VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg); ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for register copy:"; @@ -1206,8 +1427,8 @@ void VarLocBasedLDV::insertTransferDebugPair( // Create a DBG_VALUE instruction to describe the Var in its spilled // location. VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase, - SpillLocation.SpillOffset, LS); + VarLoc VL = VarLoc::CreateSpillLoc( + OldVarLoc, OldLoc, SpillLocation.SpillBase, SpillLocation.SpillOffset); ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for spill:"; @@ -1220,7 +1441,7 @@ void VarLocBasedLDV::insertTransferDebugPair( "No register supplied when handling a restore of a debug value"); // DebugInstr refers to the pre-spill location, therefore we can reuse // its expression. - VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + VarLoc VL = VarLoc::CreateCopyLoc(OldVarLoc, OldLoc, NewReg); ProcessVarLoc(VL); LLVM_DEBUG({ dbgs() << "Creating VarLoc for restore:"; @@ -1267,9 +1488,9 @@ void VarLocBasedLDV::transferRegisterDef( // reasons, it's critical to not iterate over the full set of open VarLocs. // Iterate over the set of dying/used regs instead. if (!RegMasks.empty()) { - SmallVector<uint32_t, 32> UsedRegs; + SmallVector<Register, 32> UsedRegs; getUsedRegs(OpenRanges.getVarLocs(), UsedRegs); - for (uint32_t Reg : UsedRegs) { + for (Register Reg : UsedRegs) { // Remove ranges of all clobbered registers. Register masks don't usually // list SP as preserved. Assume that call instructions never clobber SP, // because some backends (e.g., AArch64) never list SP in the regmask. @@ -1290,9 +1511,9 @@ void VarLocBasedLDV::transferRegisterDef( if (DeadRegs.empty()) return; - VarLocSet KillSet(Alloc); - collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs()); - OpenRanges.erase(KillSet, VarLocIDs); + VarLocsInRange KillSet; + collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs(), VarLocIDs); + OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kUniversalLocation); if (TPC) { auto &TM = TPC->getTM<TargetMachine>(); @@ -1390,14 +1611,14 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, then close the variable location. The value in memory // will have changed. - VarLocSet KillSet(Alloc); + VarLocsInRange KillSet; if (isSpillInstruction(MI, MF)) { Loc = extractSpillBaseRegAndOffset(MI); for (uint64_t ID : OpenRanges.getSpillVarLocs()) { LocIndex Idx = LocIndex::fromRawInteger(ID); const VarLoc &VL = VarLocIDs[Idx]; - assert(VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?"); - if (VL.Loc.SpillLocation == *Loc) { + assert(VL.containsSpillLocs() && "Broken VarLocSet?"); + if (VL.usesSpillLoc(*Loc)) { // This location is overwritten by the current instruction -- terminate // the open range, and insert an explicit DBG_VALUE $noreg. // @@ -1408,13 +1629,15 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, // // At this stage, we already know which DBG_VALUEs are for spills and // where they are located; it's best to fix handle overwrites now. - KillSet.set(ID); - VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0); - LocIndex UndefLocID = VarLocIDs.insert(UndefVL); - Transfers.push_back({&MI, UndefLocID}); + KillSet.insert(ID); + unsigned SpillLocIdx = VL.getSpillLocIdx(*Loc); + VarLoc::MachineLoc OldLoc = VL.Locs[SpillLocIdx]; + VarLoc UndefVL = VarLoc::CreateCopyLoc(VL, OldLoc, 0); + LocIndices UndefLocIDs = VarLocIDs.insert(UndefVL); + Transfers.push_back({&MI, UndefLocIDs.back()}); } } - OpenRanges.erase(KillSet, VarLocIDs); + OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kSpillLocation); } // Try to recognise spill and restore instructions that may create a new @@ -1441,21 +1664,25 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, for (uint64_t ID : TransferCandidates) { LocIndex Idx = LocIndex::fromRawInteger(ID); const VarLoc &VL = VarLocIDs[Idx]; + unsigned LocIdx; if (TKind == TransferKind::TransferSpill) { - assert(VL.isDescribedByReg() == Reg && "Broken VarLocSet?"); + assert(VL.usesReg(Reg) && "Broken VarLocSet?"); LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VL.Var.getVariable()->getName() << ")\n"); + LocIdx = VL.getRegIdx(Reg); } else { - assert(TKind == TransferKind::TransferRestore && - VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?"); - if (VL.Loc.SpillLocation != *Loc) + assert(TKind == TransferKind::TransferRestore && VL.containsSpillLocs() && + "Broken VarLocSet?"); + if (!VL.usesSpillLoc(*Loc)) // The spill location is not the location of a debug value. continue; LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' << VL.Var.getVariable()->getName() << ")\n"); + LocIdx = VL.getSpillLocIdx(*Loc); } + VarLoc::MachineLoc MLoc = VL.Locs[LocIdx]; insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind, - Reg); + MLoc, Reg); // FIXME: A comment should explain why it's correct to return early here, // if that is in fact correct. return; @@ -1504,17 +1731,16 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI, for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) { LocIndex Idx = LocIndex::fromRawInteger(ID); const VarLoc &VL = VarLocIDs[Idx]; - if (VL.getEntryValueBackupReg() == SrcReg) { + if (VL.isEntryValueBackupReg(SrcReg)) { LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump();); VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg); - // Stop tracking the original entry value. OpenRanges.erase(VL); // Start tracking the entry value copy. - LocIndex EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup); - OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup); + LocIndices EntryValCopyLocIDs = VarLocIDs.insert(EntryValLocCopyBackup); + OpenRanges.insert(EntryValCopyLocIDs, EntryValLocCopyBackup); break; } } @@ -1525,9 +1751,12 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI, for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) { LocIndex Idx = LocIndex::fromRawInteger(ID); - assert(VarLocIDs[Idx].isDescribedByReg() == SrcReg && "Broken VarLocSet?"); + assert(VarLocIDs[Idx].usesReg(SrcReg) && "Broken VarLocSet?"); + VarLoc::MachineLocValue Loc; + Loc.RegNo = SrcReg; + VarLoc::MachineLoc MLoc{VarLoc::MachineLocKind::RegisterKind, Loc}; insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, - TransferKind::TransferCopy, DestReg); + TransferKind::TransferCopy, MLoc, DestReg); // FIXME: A comment should explain why it's correct to return early here, // if that is in fact correct. return; @@ -1540,12 +1769,14 @@ bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs) { bool Changed = false; - - LLVM_DEBUG(for (uint64_t ID - : OpenRanges.getVarLocs()) { - // Copy OpenRanges to OutLocs, if not already present. - dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI); + LLVM_DEBUG({ + VarVec VarLocs; + OpenRanges.getUniqueVarLocs(VarLocs, VarLocIDs); + for (VarLoc &VL : VarLocs) { + // Copy OpenRanges to OutLocs, if not already present. + dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; + VL.dump(TRI); + } }); VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs); Changed = VLS != OpenRanges.getVarLocs(); @@ -1668,12 +1899,11 @@ bool VarLocBasedLDV::join( LLVM_DEBUG({ if (!InLocsT.empty()) { - for (uint64_t ID : InLocsT) + VarVec VarLocs; + collectAllVarLocs(VarLocs, InLocsT, VarLocIDs); + for (const VarLoc &VL : VarLocs) dbgs() << " gathered candidate incoming var: " - << VarLocIDs[LocIndex::fromRawInteger(ID)] - .Var.getVariable() - ->getName() - << "\n"; + << VL.Var.getVariable()->getName() << "\n"; } }); @@ -1722,10 +1952,12 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs, auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first); VarLocSet &Pending = *Iter.second.get(); - for (uint64_t ID : Pending) { + SmallVector<VarLoc, 32> VarLocs; + collectAllVarLocs(VarLocs, Pending, VarLocIDs); + + for (VarLoc DiffIt : VarLocs) { // The ID location is live-in to MBB -- work out what kind of machine // location it is and create a DBG_VALUE. - const VarLoc &DiffIt = VarLocIDs[LocIndex::fromRawInteger(ID)]; if (DiffIt.isEntryBackupLoc()) continue; MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); @@ -1810,8 +2042,8 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI, DIExpression *NewExpr = DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue); VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr); - LocIndex EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup); - OpenRanges.insert(EntryValLocID, EntryValLocAsBackup); + LocIndices EntryValLocIDs = VarLocIDs.insert(EntryValLocAsBackup); + OpenRanges.insert(EntryValLocIDs, EntryValLocAsBackup); } /// Calculate the liveness information for the given machine function and @@ -1896,9 +2128,9 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; - for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { - OrderToBB[RPONumber] = *RI; - BBToOrder[*RI] = RPONumber; + for (MachineBasicBlock *MBB : RPOT) { + OrderToBB[RPONumber] = MBB; + BBToOrder[MBB] = RPONumber; Worklist.push(RPONumber); ++RPONumber; } diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 2325341070a3..54058a547928 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -38,9 +38,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -56,6 +58,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <iterator> @@ -100,30 +103,134 @@ namespace { /// with some flags about the original usage of the location. class DbgVariableValue { public: - DbgVariableValue(unsigned LocNo, bool WasIndirect, - const DIExpression &Expression) - : LocNo(LocNo), WasIndirect(WasIndirect), Expression(&Expression) { - assert(getLocNo() == LocNo && "location truncation"); + DbgVariableValue(ArrayRef<unsigned> NewLocs, bool WasIndirect, bool WasList, + const DIExpression &Expr) + : WasIndirect(WasIndirect), WasList(WasList), Expression(&Expr) { + assert(!(WasIndirect && WasList) && + "DBG_VALUE_LISTs should not be indirect."); + SmallVector<unsigned> LocNoVec; + for (unsigned LocNo : NewLocs) { + auto It = find(LocNoVec, LocNo); + if (It == LocNoVec.end()) + LocNoVec.push_back(LocNo); + else { + // Loc duplicates an element in LocNos; replace references to Op + // with references to the duplicating element. + unsigned OpIdx = LocNoVec.size(); + unsigned DuplicatingIdx = std::distance(LocNoVec.begin(), It); + Expression = + DIExpression::replaceArg(Expression, OpIdx, DuplicatingIdx); + } + } + // FIXME: Debug values referencing 64+ unique machine locations are rare and + // currently unsupported for performance reasons. If we can verify that + // performance is acceptable for such debug values, we can increase the + // bit-width of LocNoCount to 14 to enable up to 16384 unique machine + // locations. We will also need to verify that this does not cause issues + // with LiveDebugVariables' use of IntervalMap. + if (LocNoVec.size() < 64) { + LocNoCount = LocNoVec.size(); + if (LocNoCount > 0) { + LocNos = std::make_unique<unsigned[]>(LocNoCount); + std::copy(LocNoVec.begin(), LocNoVec.end(), loc_nos_begin()); + } + } else { + LLVM_DEBUG(dbgs() << "Found debug value with 64+ unique machine " + "locations, dropping...\n"); + LocNoCount = 1; + // Turn this into an undef debug value list; right now, the simplest form + // of this is an expression with one arg, and an undef debug operand. + Expression = + DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0, + dwarf::DW_OP_stack_value}); + if (auto FragmentInfoOpt = Expr.getFragmentInfo()) + Expression = *DIExpression::createFragmentExpression( + Expression, FragmentInfoOpt->OffsetInBits, + FragmentInfoOpt->SizeInBits); + LocNos = std::make_unique<unsigned[]>(LocNoCount); + LocNos[0] = UndefLocNo; + } } - DbgVariableValue() : LocNo(0), WasIndirect(0) {} + DbgVariableValue() : LocNoCount(0), WasIndirect(0), WasList(0) {} + DbgVariableValue(const DbgVariableValue &Other) + : LocNoCount(Other.LocNoCount), WasIndirect(Other.getWasIndirect()), + WasList(Other.getWasList()), Expression(Other.getExpression()) { + if (Other.getLocNoCount()) { + LocNos.reset(new unsigned[Other.getLocNoCount()]); + std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin()); + } + } + + DbgVariableValue &operator=(const DbgVariableValue &Other) { + if (this == &Other) + return *this; + if (Other.getLocNoCount()) { + LocNos.reset(new unsigned[Other.getLocNoCount()]); + std::copy(Other.loc_nos_begin(), Other.loc_nos_end(), loc_nos_begin()); + } else { + LocNos.release(); + } + LocNoCount = Other.getLocNoCount(); + WasIndirect = Other.getWasIndirect(); + WasList = Other.getWasList(); + Expression = Other.getExpression(); + return *this; + } const DIExpression *getExpression() const { return Expression; } - unsigned getLocNo() const { - // Fix up the undef location number, which gets truncated. - return LocNo == INT_MAX ? UndefLocNo : LocNo; + uint8_t getLocNoCount() const { return LocNoCount; } + bool containsLocNo(unsigned LocNo) const { + return is_contained(loc_nos(), LocNo); } bool getWasIndirect() const { return WasIndirect; } - bool isUndef() const { return getLocNo() == UndefLocNo; } + bool getWasList() const { return WasList; } + bool isUndef() const { return LocNoCount == 0 || containsLocNo(UndefLocNo); } + + DbgVariableValue decrementLocNosAfterPivot(unsigned Pivot) const { + SmallVector<unsigned, 4> NewLocNos; + for (unsigned LocNo : loc_nos()) + NewLocNos.push_back(LocNo != UndefLocNo && LocNo > Pivot ? LocNo - 1 + : LocNo); + return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression); + } - DbgVariableValue changeLocNo(unsigned NewLocNo) const { - return DbgVariableValue(NewLocNo, WasIndirect, *Expression); + DbgVariableValue remapLocNos(ArrayRef<unsigned> LocNoMap) const { + SmallVector<unsigned> NewLocNos; + for (unsigned LocNo : loc_nos()) + // Undef values don't exist in locations (and thus not in LocNoMap + // either) so skip over them. See getLocationNo(). + NewLocNos.push_back(LocNo == UndefLocNo ? UndefLocNo : LocNoMap[LocNo]); + return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression); + } + + DbgVariableValue changeLocNo(unsigned OldLocNo, unsigned NewLocNo) const { + SmallVector<unsigned> NewLocNos; + NewLocNos.assign(loc_nos_begin(), loc_nos_end()); + auto OldLocIt = find(NewLocNos, OldLocNo); + assert(OldLocIt != NewLocNos.end() && "Old location must be present."); + *OldLocIt = NewLocNo; + return DbgVariableValue(NewLocNos, WasIndirect, WasList, *Expression); + } + + bool hasLocNoGreaterThan(unsigned LocNo) const { + return any_of(loc_nos(), + [LocNo](unsigned ThisLocNo) { return ThisLocNo > LocNo; }); + } + + void printLocNos(llvm::raw_ostream &OS) const { + for (const unsigned &Loc : loc_nos()) + OS << (&Loc == loc_nos_begin() ? " " : ", ") << Loc; } friend inline bool operator==(const DbgVariableValue &LHS, const DbgVariableValue &RHS) { - return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect && - LHS.Expression == RHS.Expression; + if (std::tie(LHS.LocNoCount, LHS.WasIndirect, LHS.WasList, + LHS.Expression) != + std::tie(RHS.LocNoCount, RHS.WasIndirect, RHS.WasList, RHS.Expression)) + return false; + return std::equal(LHS.loc_nos_begin(), LHS.loc_nos_end(), + RHS.loc_nos_begin()); } friend inline bool operator!=(const DbgVariableValue &LHS, @@ -131,9 +238,24 @@ public: return !(LHS == RHS); } + unsigned *loc_nos_begin() { return LocNos.get(); } + const unsigned *loc_nos_begin() const { return LocNos.get(); } + unsigned *loc_nos_end() { return LocNos.get() + LocNoCount; } + const unsigned *loc_nos_end() const { return LocNos.get() + LocNoCount; } + ArrayRef<unsigned> loc_nos() const { + return ArrayRef<unsigned>(LocNos.get(), LocNoCount); + } + private: - unsigned LocNo : 31; - unsigned WasIndirect : 1; + // IntervalMap requires the value object to be very small, to the extent + // that we do not have enough room for an std::vector. Using a C-style array + // (with a unique_ptr wrapper for convenience) allows us to optimize for this + // specific case by packing the array size into only 6 bits (it is highly + // unlikely that any debug value will need 64+ locations). + std::unique_ptr<unsigned[]> LocNos; + uint8_t LocNoCount : 6; + bool WasIndirect : 1; + bool WasList : 1; const DIExpression *Expression = nullptr; }; } // namespace @@ -145,6 +267,14 @@ using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>; /// Non-spilled locations are not added to the map. using SpillOffsetMap = DenseMap<unsigned, unsigned>; +/// Cache to save the location where it can be used as the starting +/// position as input for calling MachineBasicBlock::SkipPHIsLabelsAndDebug. +/// This is to prevent MachineBasicBlock::SkipPHIsLabelsAndDebug from +/// repeatedly searching the same set of PHIs/Labels/Debug instructions +/// if it is called many times for the same block. +using BlockSkipInstsMap = + DenseMap<MachineBasicBlock *, MachineBasicBlock::iterator>; + namespace { class LDVImpl; @@ -179,9 +309,11 @@ class UserValue { /// Insert a DBG_VALUE into MBB at Idx for DbgValue. void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, SlotIndex StopIdx, DbgVariableValue DbgValue, - bool Spilled, unsigned SpillOffset, LiveIntervals &LIS, + ArrayRef<bool> LocSpills, + ArrayRef<unsigned> SpillOffsets, LiveIntervals &LIS, const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI); + const TargetRegisterInfo &TRI, + BlockSkipInstsMap &BBSkipInstsMap); /// Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. @@ -264,17 +396,17 @@ public: void removeLocationIfUnused(unsigned LocNo) { // Bail out if LocNo still is used. for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { - DbgVariableValue DbgValue = I.value(); - if (DbgValue.getLocNo() == LocNo) + const DbgVariableValue &DbgValue = I.value(); + if (DbgValue.containsLocNo(LocNo)) return; } // Remove the entry in the locations vector, and adjust all references to // location numbers above the removed entry. locations.erase(locations.begin() + LocNo); for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - DbgVariableValue DbgValue = I.value(); - if (!DbgValue.isUndef() && DbgValue.getLocNo() > LocNo) - I.setValueUnchecked(DbgValue.changeLocNo(DbgValue.getLocNo() - 1)); + const DbgVariableValue &DbgValue = I.value(); + if (DbgValue.hasLocNoGreaterThan(LocNo)) + I.setValueUnchecked(DbgValue.decrementLocNosAfterPivot(LocNo)); } } @@ -282,16 +414,19 @@ public: void mapVirtRegs(LDVImpl *LDV); /// Add a definition point to this user value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect, - const DIExpression &Expr) { - DbgVariableValue DbgValue(getLocationNo(LocMO), IsIndirect, Expr); + void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect, + bool IsList, const DIExpression &Expr) { + SmallVector<unsigned> Locs; + for (MachineOperand Op : LocMOs) + Locs.push_back(getLocationNo(Op)); + DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr); // Add a singular (Idx,Idx) -> value mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) - I.insert(Idx, Idx.getNextSlot(), DbgValue); + I.insert(Idx, Idx.getNextSlot(), std::move(DbgValue)); else // A later DBG_VALUE at the same SlotIndex overrides the old location. - I.setValue(DbgValue); + I.setValue(std::move(DbgValue)); } /// Extend the current definition as far as possible down. @@ -304,25 +439,30 @@ public: /// /// \param Idx Starting point for the definition. /// \param DbgValue value to propagate. - /// \param LR Restrict liveness to where LR has the value VNI. May be null. - /// \param VNI When LR is not null, this is the value to restrict to. + /// \param LiveIntervalInfo For each location number key in this map, + /// restricts liveness to where the LiveRange has the value equal to the\ + /// VNInfo. /// \param [out] Kills Append end points of VNI's live range to Kills. /// \param LIS Live intervals analysis. - void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR, - const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, + void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, + SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> + &LiveIntervalInfo, + Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, LiveIntervals &LIS); /// The value in LI may be copies to other registers. Determine if /// any of the copies are available at the kill points, and add defs if /// possible. /// - /// \param LI Scan for copies of the value in LI->reg. /// \param DbgValue Location number of LI->reg, and DIExpression. - /// \param Kills Points where the range of DbgValue could be extended. + /// \param LocIntervals Scan for copies of the value for each location in the + /// corresponding LiveInterval->reg. + /// \param KilledAt The point where the range of DbgValue could be extended. /// \param [in,out] NewDefs Append (Idx, DbgValue) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, DbgVariableValue DbgValue, - const SmallVectorImpl<SlotIndex> &Kills, + DbgVariableValue DbgValue, + SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals, + SlotIndex KilledAt, SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); @@ -348,10 +488,11 @@ public: void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const SpillOffsetMap &SpillOffsets); + const SpillOffsetMap &SpillOffsets, + BlockSkipInstsMap &BBSkipInstsMap); /// Return DebugLoc of this UserValue. - DebugLoc getDebugLoc() { return dl;} + const DebugLoc &getDebugLoc() { return dl; } void print(raw_ostream &, const TargetRegisterInfo *); }; @@ -365,7 +506,8 @@ class UserLabel { /// Insert a DBG_LABEL into MBB at Idx. void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII); + LiveIntervals &LIS, const TargetInstrInfo &TII, + BlockSkipInstsMap &BBSkipInstsMap); public: /// Create a new UserLabel. @@ -379,10 +521,11 @@ public: } /// Recreate DBG_LABEL instruction from data structures. - void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII); + void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII, + BlockSkipInstsMap &BBSkipInstsMap); /// Return DebugLoc of this UserLabel. - DebugLoc getDebugLoc() { return dl; } + const DebugLoc &getDebugLoc() { return dl; } void print(raw_ostream &, const TargetRegisterInfo *); }; @@ -395,10 +538,31 @@ class LDVImpl { LiveIntervals *LIS; const TargetRegisterInfo *TRI; - using StashedInstrRef = - std::tuple<unsigned, unsigned, const DILocalVariable *, - const DIExpression *, DebugLoc>; - std::map<SlotIndex, std::vector<StashedInstrRef>> StashedInstrReferences; + /// Position and VReg of a PHI instruction during register allocation. + struct PHIValPos { + SlotIndex SI; /// Slot where this PHI occurs. + Register Reg; /// VReg this PHI occurs in. + unsigned SubReg; /// Qualifiying subregister for Reg. + }; + + /// Map from debug instruction number to PHI position during allocation. + std::map<unsigned, PHIValPos> PHIValToPos; + /// Index of, for each VReg, which debug instruction numbers and corresponding + /// PHIs are sensitive to splitting. Each VReg may have multiple PHI defs, + /// at different positions. + DenseMap<Register, std::vector<unsigned>> RegToPHIIdx; + + /// Record for any debug instructions unlinked from their blocks during + /// regalloc. Stores the instr and it's location, so that they can be + /// re-inserted after regalloc is over. + struct InstrPos { + MachineInstr *MI; ///< Debug instruction, unlinked from it's block. + SlotIndex Idx; ///< Slot position where MI should be re-inserted. + MachineBasicBlock *MBB; ///< Block that MI was in. + }; + + /// Collection of stored debug instructions, preserved until after regalloc. + SmallVector<InstrPos, 32> StashedDebugInstrs; /// Whether emitDebugValues is called. bool EmitDone = false; @@ -436,15 +600,18 @@ class LDVImpl { /// \returns True if the DBG_VALUE instruction should be deleted. bool handleDebugValue(MachineInstr &MI, SlotIndex Idx); - /// Track a DBG_INSTR_REF. This needs to be removed from the MachineFunction - /// during regalloc -- but there's no need to maintain live ranges, as we - /// refer to a value rather than a location. + /// Track variable location debug instructions while using the instruction + /// referencing implementation. Such debug instructions do not need to be + /// updated during regalloc because they identify instructions rather than + /// register locations. However, they needs to be removed from the + /// MachineFunction during regalloc, then re-inserted later, to avoid + /// disrupting the allocator. /// - /// \param MI DBG_INSTR_REF instruction + /// \param MI Any DBG_VALUE / DBG_INSTR_REF / DBG_PHI instruction /// \param Idx Last valid SlotIndex before instruction /// - /// \returns True if the DBG_VALUE instruction should be deleted. - bool handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx); + /// \returns Iterator to continue processing from after unlinking. + MachineBasicBlock::iterator handleDebugInstr(MachineInstr &MI, SlotIndex Idx); /// Add DBG_LABEL instruction to UserLabel. /// @@ -458,9 +625,11 @@ class LDVImpl { /// for each instruction. /// /// \param mf MachineFunction to be scanned. + /// \param InstrRef Whether to operate in instruction referencing mode. If + /// true, most of LiveDebugVariables doesn't run. /// /// \returns True if any debug values were found. - bool collectDebugValues(MachineFunction &mf); + bool collectDebugValues(MachineFunction &mf, bool InstrRef); /// Compute the live intervals of all user values after collecting all /// their def points. @@ -469,12 +638,14 @@ class LDVImpl { public: LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} - bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf, bool InstrRef); /// Release all memory. void clear() { MF = nullptr; - StashedInstrReferences.clear(); + PHIValToPos.clear(); + RegToPHIIdx.clear(); + StashedDebugInstrs.clear(); userValues.clear(); userLabels.clear(); virtRegToEqClass.clear(); @@ -489,6 +660,10 @@ public: /// Map virtual register to an equivalence class. void mapVirtReg(Register VirtReg, UserValue *EC); + /// Replace any PHI referring to OldReg with its corresponding NewReg, if + /// present. + void splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs); + /// Replace all references to OldReg with NewRegs. void splitRegister(Register OldReg, ArrayRef<Register> NewRegs); @@ -555,11 +730,13 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { OS << " [" << I.start() << ';' << I.stop() << "):"; if (I.value().isUndef()) - OS << "undef"; + OS << " undef"; else { - OS << I.value().getLocNo(); + I.value().printLocNos(OS); if (I.value().getWasIndirect()) OS << " ind"; + else if (I.value().getWasList()) + OS << " list"; } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { @@ -623,11 +800,21 @@ UserValue *LDVImpl::lookupVirtReg(Register VirtReg) { } bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { - // DBG_VALUE loc, offset, variable - if (MI.getNumOperands() != 4 || - !(MI.getDebugOffset().isReg() || MI.getDebugOffset().isImm()) || - !MI.getDebugVariableOp().isMetadata()) { - LLVM_DEBUG(dbgs() << "Can't handle " << MI); + // DBG_VALUE loc, offset, variable, expr + // DBG_VALUE_LIST variable, expr, locs... + if (!MI.isDebugValue()) { + LLVM_DEBUG(dbgs() << "Can't handle non-DBG_VALUE*: " << MI); + return false; + } + if (!MI.getDebugVariableOp().isMetadata()) { + LLVM_DEBUG(dbgs() << "Can't handle DBG_VALUE* with invalid variable: " + << MI); + return false; + } + if (MI.isNonListDebugValue() && + (MI.getNumOperands() != 4 || + !(MI.getDebugOffset().isImm() || MI.getDebugOffset().isReg()))) { + LLVM_DEBUG(dbgs() << "Can't handle malformed DBG_VALUE: " << MI); return false; } @@ -639,27 +826,28 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // (and if the machine verifier is improved to catch this), then these checks // could be removed or replaced by asserts. bool Discard = false; - if (MI.getDebugOperand(0).isReg() && - Register::isVirtualRegister(MI.getDebugOperand(0).getReg())) { - const Register Reg = MI.getDebugOperand(0).getReg(); - if (!LIS->hasInterval(Reg)) { - // The DBG_VALUE is described by a virtual register that does not have a - // live interval. Discard the DBG_VALUE. - Discard = true; - LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx - << " " << MI); - } else { - // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg - // is defined dead at Idx (where Idx is the slot index for the instruction - // preceding the DBG_VALUE). - const LiveInterval &LI = LIS->getInterval(Reg); - LiveQueryResult LRQ = LI.Query(Idx); - if (!LRQ.valueOutOrDead()) { - // We have found a DBG_VALUE with the value in a virtual register that - // is not live. Discard the DBG_VALUE. + for (const MachineOperand &Op : MI.debug_operands()) { + if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { + const Register Reg = Op.getReg(); + if (!LIS->hasInterval(Reg)) { + // The DBG_VALUE is described by a virtual register that does not have a + // live interval. Discard the DBG_VALUE. Discard = true; - LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx + LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx << " " << MI); + } else { + // The DBG_VALUE is only valid if either Reg is live out from Idx, or + // Reg is defined dead at Idx (where Idx is the slot index for the + // instruction preceding the DBG_VALUE). + const LiveInterval &LI = LIS->getInterval(Reg); + LiveQueryResult LRQ = LI.Query(Idx); + if (!LRQ.valueOutOrDead()) { + // We have found a DBG_VALUE with the value in a virtual register that + // is not live. Discard the DBG_VALUE. + Discard = true; + LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx + << " " << MI); + } } } } @@ -669,30 +857,42 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { if (IsIndirect) assert(MI.getDebugOffset().getImm() == 0 && "DBG_VALUE with nonzero offset"); + bool IsList = MI.isDebugValueList(); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr->getFragmentInfo(), MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getDebugOperand(0), IsIndirect, *Expr); + UV->addDef(Idx, + ArrayRef<MachineOperand>(MI.debug_operands().begin(), + MI.debug_operands().end()), + IsIndirect, IsList, *Expr); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO, false, *Expr); + // We should still pass a list the same size as MI.debug_operands() even if + // all MOs are undef, so that DbgVariableValue can correctly adjust the + // expression while removing the duplicated undefs. + SmallVector<MachineOperand, 4> UndefMOs(MI.getNumDebugOperands(), MO); + UV->addDef(Idx, UndefMOs, false, IsList, *Expr); } return true; } -bool LDVImpl::handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx) { - assert(MI.isDebugRef()); - unsigned InstrNum = MI.getOperand(0).getImm(); - unsigned OperandNum = MI.getOperand(1).getImm(); - auto *Var = MI.getDebugVariable(); - auto *Expr = MI.getDebugExpression(); - auto &DL = MI.getDebugLoc(); - StashedInstrRef Stashed = - std::make_tuple(InstrNum, OperandNum, Var, Expr, DL); - StashedInstrReferences[Idx].push_back(Stashed); - return true; +MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI, + SlotIndex Idx) { + assert(MI.isDebugValue() || MI.isDebugRef() || MI.isDebugPHI()); + + // In instruction referencing mode, there should be no DBG_VALUE instructions + // that refer to virtual registers. They might still refer to constants. + if (MI.isDebugValue()) + assert(!MI.getOperand(0).isReg() || !MI.getOperand(0).getReg().isVirtual()); + + // Unlink the instruction, store it in the debug instructions collection. + auto NextInst = std::next(MI.getIterator()); + auto *MBB = MI.getParent(); + MI.removeFromParent(); + StashedDebugInstrs.push_back({&MI, Idx, MBB}); + return NextInst; } bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { @@ -718,62 +918,71 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { return true; } -bool LDVImpl::collectDebugValues(MachineFunction &mf) { +bool LDVImpl::collectDebugValues(MachineFunction &mf, bool InstrRef) { bool Changed = false; - for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; - ++MFI) { - MachineBasicBlock *MBB = &*MFI; - for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + for (MachineBasicBlock &MBB : mf) { + for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE;) { // Use the first debug instruction in the sequence to get a SlotIndex // for following consecutive debug instructions. - if (!MBBI->isDebugInstr()) { + if (!MBBI->isDebugOrPseudoInstr()) { ++MBBI; continue; } // Debug instructions has no slot index. Use the previous // non-debug instruction's SlotIndex as its SlotIndex. SlotIndex Idx = - MBBI == MBB->begin() - ? LIS->getMBBStartIdx(MBB) + MBBI == MBB.begin() + ? LIS->getMBBStartIdx(&MBB) : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot(); // Handle consecutive debug instructions with the same slot index. do { - // Only handle DBG_VALUE in handleDebugValue(). Skip all other - // kinds of debug instructions. - if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) || - (MBBI->isDebugRef() && handleDebugInstrRef(*MBBI, Idx)) || - (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) { - MBBI = MBB->erase(MBBI); + // In instruction referencing mode, pass each instr to handleDebugInstr + // to be unlinked. Ignore DBG_VALUE_LISTs -- they refer to vregs, and + // need to go through the normal live interval splitting process. + if (InstrRef && (MBBI->isNonListDebugValue() || MBBI->isDebugPHI() || + MBBI->isDebugRef())) { + MBBI = handleDebugInstr(*MBBI, Idx); + Changed = true; + // In normal debug mode, use the dedicated DBG_VALUE / DBG_LABEL handler + // to track things through register allocation, and erase the instr. + } else if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) || + (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) { + MBBI = MBB.erase(MBBI); Changed = true; } else ++MBBI; - } while (MBBI != MBBE && MBBI->isDebugInstr()); + } while (MBBI != MBBE && MBBI->isDebugOrPseudoInstr()); } } return Changed; } -void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR, - const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, - LiveIntervals &LIS) { +void UserValue::extendDef( + SlotIndex Idx, DbgVariableValue DbgValue, + SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> + &LiveIntervalInfo, + Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills, + LiveIntervals &LIS) { SlotIndex Start = Idx; MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); SlotIndex Stop = LIS.getMBBEndIdx(MBB); LocMap::iterator I = locInts.find(Start); - // Limit to VNI's live range. - bool ToEnd = true; - if (LR && VNI) { + // Limit to the intersection of the VNIs' live ranges. + for (auto &LII : LiveIntervalInfo) { + LiveRange *LR = LII.second.first; + assert(LR && LII.second.second && "Missing range info for Idx."); LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); - if (!Segment || Segment->valno != VNI) { - if (Kills) - Kills->push_back(Start); - return; - } + assert(Segment && Segment->valno == LII.second.second && + "Invalid VNInfo for Idx given?"); if (Segment->end < Stop) { Stop = Segment->end; - ToEnd = false; + Kills = {Stop, {LII.first}}; + } else if (Segment->end == Stop && Kills.hasValue()) { + // If multiple locations end at the same place, track all of them in + // Kills. + Kills->second.push_back(LII.first); } } @@ -781,94 +990,116 @@ void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *L if (I.valid() && I.start() <= Start) { // Stop when meeting a different location or an already extended interval. Start = Start.getNextSlot(); - if (I.value() != DbgValue || I.stop() != Start) + if (I.value() != DbgValue || I.stop() != Start) { + // Clear `Kills`, as we have a new def available. + Kills = None; return; + } // This is a one-slot placeholder. Just skip it. ++I; } // Limited by the next def. - if (I.valid() && I.start() < Stop) + if (I.valid() && I.start() < Stop) { Stop = I.start(); - // Limited by VNI's live range. - else if (!ToEnd && Kills) - Kills->push_back(Stop); + // Clear `Kills`, as we have a new def available. + Kills = None; + } - if (Start < Stop) - I.insert(Start, Stop, DbgValue); + if (Start < Stop) { + DbgVariableValue ExtDbgValue(DbgValue); + I.insert(Start, Stop, std::move(ExtDbgValue)); + } } void UserValue::addDefsFromCopies( - LiveInterval *LI, DbgVariableValue DbgValue, - const SmallVectorImpl<SlotIndex> &Kills, + DbgVariableValue DbgValue, + SmallVectorImpl<std::pair<unsigned, LiveInterval *>> &LocIntervals, + SlotIndex KilledAt, SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { - if (Kills.empty()) - return; // Don't track copies from physregs, there are too many uses. - if (!Register::isVirtualRegister(LI->reg())) + if (any_of(LocIntervals, [](auto LocI) { + return !Register::isVirtualRegister(LocI.second->reg()); + })) return; // Collect all the (vreg, valno) pairs that are copies of LI. - SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; - for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) { - MachineInstr *MI = MO.getParent(); - // Copies of the full value. - if (MO.getSubReg() || !MI->isCopy()) - continue; - Register DstReg = MI->getOperand(0).getReg(); + SmallDenseMap<unsigned, + SmallVector<std::pair<LiveInterval *, const VNInfo *>, 4>> + CopyValues; + for (auto &LocInterval : LocIntervals) { + unsigned LocNo = LocInterval.first; + LiveInterval *LI = LocInterval.second; + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) { + MachineInstr *MI = MO.getParent(); + // Copies of the full value. + if (MO.getSubReg() || !MI->isCopy()) + continue; + Register DstReg = MI->getOperand(0).getReg(); - // Don't follow copies to physregs. These are usually setting up call - // arguments, and the argument registers are always call clobbered. We are - // better off in the source register which could be a callee-saved register, - // or it could be spilled. - if (!Register::isVirtualRegister(DstReg)) - continue; + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved + // register, or it could be spilled. + if (!Register::isVirtualRegister(DstReg)) + continue; - // Is the value extended to reach this copy? If not, another def may be - // blocking it, or we are looking at a wrong value of LI. - SlotIndex Idx = LIS.getInstructionIndex(*MI); - LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); - if (!I.valid() || I.value() != DbgValue) - continue; + // Is the value extended to reach this copy? If not, another def may be + // blocking it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(*MI); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); + if (!I.valid() || I.value() != DbgValue) + continue; - if (!LIS.hasInterval(DstReg)) - continue; - LiveInterval *DstLI = &LIS.getInterval(DstReg); - const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); - assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); - CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); + CopyValues[LocNo].push_back(std::make_pair(DstLI, DstVNI)); + } } if (CopyValues.empty()) return; - LLVM_DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI - << '\n'); +#if !defined(NDEBUG) + for (auto &LocInterval : LocIntervals) + LLVM_DEBUG(dbgs() << "Got " << CopyValues[LocInterval.first].size() + << " copies of " << *LocInterval.second << '\n'); +#endif - // Try to add defs of the copied values for each kill point. - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - SlotIndex Idx = Kills[i]; - for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { - LiveInterval *DstLI = CopyValues[j].first; - const VNInfo *DstVNI = CopyValues[j].second; - if (DstLI->getVNInfoAt(Idx) != DstVNI) - continue; - // Check that there isn't already a def at Idx - LocMap::iterator I = locInts.find(Idx); - if (I.valid() && I.start() <= Idx) + // Try to add defs of the copied values for the kill point. Check that there + // isn't already a def at Idx. + LocMap::iterator I = locInts.find(KilledAt); + if (I.valid() && I.start() <= KilledAt) + return; + DbgVariableValue NewValue(DbgValue); + for (auto &LocInterval : LocIntervals) { + unsigned LocNo = LocInterval.first; + bool FoundCopy = false; + for (auto &LIAndVNI : CopyValues[LocNo]) { + LiveInterval *DstLI = LIAndVNI.first; + const VNInfo *DstVNI = LIAndVNI.second; + if (DstLI->getVNInfoAt(KilledAt) != DstVNI) continue; - LLVM_DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + LLVM_DEBUG(dbgs() << "Kill at " << KilledAt << " covered by valno #" << DstVNI->id << " in " << *DstLI << '\n'); MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); - unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgVariableValue NewValue = DbgValue.changeLocNo(LocNo); - I.insert(Idx, Idx.getNextSlot(), NewValue); - NewDefs.push_back(std::make_pair(Idx, NewValue)); + unsigned NewLocNo = getLocationNo(CopyMI->getOperand(0)); + NewValue = NewValue.changeLocNo(LocNo, NewLocNo); + FoundCopy = true; break; } + // If there are any killed locations we can't find a copy for, we can't + // extend the variable value. + if (!FoundCopy) + return; } + I.insert(KilledAt, KilledAt.getNextSlot(), NewValue); + NewDefs.push_back(std::make_pair(KilledAt, NewValue)); } void UserValue::computeIntervals(MachineRegisterInfo &MRI, @@ -885,34 +1116,54 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; DbgVariableValue DbgValue = Defs[i].second; - const MachineOperand &LocMO = locations[DbgValue.getLocNo()]; - - if (!LocMO.isReg()) { - extendDef(Idx, DbgValue, nullptr, nullptr, nullptr, LIS); - continue; - } - - // Register locations are constrained to where the register value is live. - if (Register::isVirtualRegister(LocMO.getReg())) { + SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>> LIs; + SmallVector<const VNInfo *, 4> VNIs; + bool ShouldExtendDef = false; + for (unsigned LocNo : DbgValue.loc_nos()) { + const MachineOperand &LocMO = locations[LocNo]; + if (!LocMO.isReg() || !Register::isVirtualRegister(LocMO.getReg())) { + ShouldExtendDef |= !LocMO.isReg(); + continue; + } + ShouldExtendDef = true; LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; if (LIS.hasInterval(LocMO.getReg())) { LI = &LIS.getInterval(LocMO.getReg()); VNI = LI->getVNInfoAt(Idx); } - SmallVector<SlotIndex, 16> Kills; - extendDef(Idx, DbgValue, LI, VNI, &Kills, LIS); - // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that - // if the original location for example is %vreg0:sub_hi, and we find a - // full register copy in addDefsFromCopies (at the moment it only handles - // full register copies), then we must add the sub1 sub-register index to - // the new location. However, that is only possible if the new virtual - // register is of the same regclass (or if there is an equivalent - // sub-register in that regclass). For now, simply skip handling copies if - // a sub-register is involved. - if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, DbgValue, Kills, Defs, MRI, LIS); - continue; + if (LI && VNI) + LIs[LocNo] = {LI, VNI}; + } + if (ShouldExtendDef) { + Optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills; + extendDef(Idx, DbgValue, LIs, Kills, LIS); + + if (Kills) { + SmallVector<std::pair<unsigned, LiveInterval *>, 2> KilledLocIntervals; + bool AnySubreg = false; + for (unsigned LocNo : Kills->second) { + const MachineOperand &LocMO = this->locations[LocNo]; + if (LocMO.getSubReg()) { + AnySubreg = true; + break; + } + LiveInterval *LI = &LIS.getInterval(LocMO.getReg()); + KilledLocIntervals.push_back({LocNo, LI}); + } + + // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that + // if the original location for example is %vreg0:sub_hi, and we find a + // full register copy in addDefsFromCopies (at the moment it only + // handles full register copies), then we must add the sub1 sub-register + // index to the new location. However, that is only possible if the new + // virtual register is of the same regclass (or if there is an + // equivalent sub-register in that regclass). For now, simply skip + // handling copies if a sub-register is involved. + if (!AnySubreg) + addDefsFromCopies(DbgValue, KilledLocIntervals, Kills->first, Defs, + MRI, LIS); + } } // For physregs, we only mark the start slot idx. DwarfDebug will see it @@ -927,7 +1178,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // location's lexical scope. In this case, splitting of an interval // can result in an interval outside of the scope being created, // causing extra unnecessary DBG_VALUEs to be emitted. To prevent - // this, trim the intervals to the lexical scope. + // this, trim the intervals to the lexical scope in the case of inlined + // variables, since heavy inlining may cause production of dramatically big + // number of DBG_VALUEs to be generated. + if (!dl.getInlinedAt()) + return; LexicalScope *Scope = LS.findLexicalScope(dl); if (!Scope) @@ -1007,7 +1262,7 @@ void LDVImpl::computeIntervals() { } } -bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { +bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) { clear(); MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); @@ -1015,9 +1270,24 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); - bool Changed = collectDebugValues(mf); + bool Changed = collectDebugValues(mf, InstrRef); computeIntervals(); LLVM_DEBUG(print(dbgs())); + + // Collect the set of VReg / SlotIndexs where PHIs occur; index the sensitive + // VRegs too, for when we're notified of a range split. + SlotIndexes *Slots = LIS->getSlotIndexes(); + for (const auto &PHIIt : MF->DebugPHIPositions) { + const MachineFunction::DebugPHIRegallocPos &Position = PHIIt.second; + MachineBasicBlock *MBB = Position.MBB; + Register Reg = Position.Reg; + unsigned SubReg = Position.SubReg; + SlotIndex SI = Slots->getMBBStartIdx(MBB); + PHIValPos VP = {SI, Reg, SubReg}; + PHIValToPos.insert(std::make_pair(PHIIt.first, VP)); + RegToPHIIdx[Reg].push_back(PHIIt.first); + } + ModifiedMF = Changed; return Changed; } @@ -1041,9 +1311,19 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { removeDebugInstrs(mf); return false; } + + // Have we been asked to track variable locations using instruction + // referencing? + bool InstrRef = false; + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (TPC) { + auto &TM = TPC->getTM<TargetMachine>(); + InstrRef = TM.Options.ValueTrackingVariableLocations; + } + if (!pImpl) pImpl = new LDVImpl(this); - return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); + return static_cast<LDVImpl *>(pImpl)->runOnMachineFunction(mf, InstrRef); } void LiveDebugVariables::releaseMemory() { @@ -1091,7 +1371,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, break; // Now LII->end > LocMapI.start(). Do we have an overlap? - if (LocMapI.value().getLocNo() == OldLocNo && + if (LocMapI.value().containsLocNo(OldLocNo) && LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. if (NewLocNo == UndefLocNo) { @@ -1112,7 +1392,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs, LocMapI.setStopUnchecked(LII->end); // Change the value in the overlap. This may trigger coalescing. - LocMapI.setValue(OldDbgValue.changeLocNo(NewLocNo)); + LocMapI.setValue(OldDbgValue.changeLocNo(OldLocNo, NewLocNo)); // Re-insert any removed OldDbgValue ranges. if (LStart < LocMapI.start()) { @@ -1176,7 +1456,50 @@ UserValue::splitRegister(Register OldReg, ArrayRef<Register> NewRegs, return DidChange; } +void LDVImpl::splitPHIRegister(Register OldReg, ArrayRef<Register> NewRegs) { + auto RegIt = RegToPHIIdx.find(OldReg); + if (RegIt == RegToPHIIdx.end()) + return; + + std::vector<std::pair<Register, unsigned>> NewRegIdxes; + // Iterate over all the debug instruction numbers affected by this split. + for (unsigned InstrID : RegIt->second) { + auto PHIIt = PHIValToPos.find(InstrID); + assert(PHIIt != PHIValToPos.end()); + const SlotIndex &Slot = PHIIt->second.SI; + assert(OldReg == PHIIt->second.Reg); + + // Find the new register that covers this position. + for (auto NewReg : NewRegs) { + const LiveInterval &LI = LIS->getInterval(NewReg); + auto LII = LI.find(Slot); + if (LII != LI.end() && LII->start <= Slot) { + // This new register covers this PHI position, record this for indexing. + NewRegIdxes.push_back(std::make_pair(NewReg, InstrID)); + // Record that this value lives in a different VReg now. + PHIIt->second.Reg = NewReg; + break; + } + } + + // If we do not find a new register covering this PHI, then register + // allocation has dropped its location, for example because it's not live. + // The old VReg will not be mapped to a physreg, and the instruction + // number will have been optimized out. + } + + // Re-create register index using the new register numbers. + RegToPHIIdx.erase(RegIt); + for (auto &RegAndInstr : NewRegIdxes) + RegToPHIIdx[RegAndInstr.first].push_back(RegAndInstr.second); +} + void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) { + // Consider whether this split range affects any PHI locations. + splitPHIRegister(OldReg, NewRegs); + + // Check whether any intervals mapped by a DBG_VALUE were split and need + // updating. bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); @@ -1269,21 +1592,15 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, // DBG_VALUE intervals with different vregs that were allocated to the same // physical register. for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - DbgVariableValue DbgValue = I.value(); - // Undef values don't exist in locations (and thus not in LocNoMap either) - // so skip over them. See getLocationNo(). - if (DbgValue.isUndef()) - continue; - unsigned NewLocNo = LocNoMap[DbgValue.getLocNo()]; - I.setValueUnchecked(DbgValue.changeLocNo(NewLocNo)); + I.setValueUnchecked(I.value().remapLocNos(LocNoMap)); I.setStart(I.start()); } } /// Find an iterator for inserting a DBG_VALUE instruction. static MachineBasicBlock::iterator -findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS) { +findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, LiveIntervals &LIS, + BlockSkipInstsMap &BBSkipInstsMap) { SlotIndex Start = LIS.getMBBStartIdx(MBB); Idx = Idx.getBaseIndex(); @@ -1292,7 +1609,29 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, while (!(MI = LIS.getInstructionFromIndex(Idx))) { // We've reached the beginning of MBB. if (Idx == Start) { - MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); + // Retrieve the last PHI/Label/Debug location found when calling + // SkipPHIsLabelsAndDebug last time. Start searching from there. + // + // Note the iterator kept in BBSkipInstsMap is one step back based + // on the iterator returned by SkipPHIsLabelsAndDebug last time. + // One exception is when SkipPHIsLabelsAndDebug returns MBB->begin(), + // BBSkipInstsMap won't save it. This is to consider the case that + // new instructions may be inserted at the beginning of MBB after + // last call of SkipPHIsLabelsAndDebug. If we save MBB->begin() in + // BBSkipInstsMap, after new non-phi/non-label/non-debug instructions + // are inserted at the beginning of the MBB, the iterator in + // BBSkipInstsMap won't point to the beginning of the MBB anymore. + // Therefore The next search in SkipPHIsLabelsAndDebug will skip those + // newly added instructions and that is unwanted. + MachineBasicBlock::iterator BeginIt; + auto MapIt = BBSkipInstsMap.find(MBB); + if (MapIt == BBSkipInstsMap.end()) + BeginIt = MBB->begin(); + else + BeginIt = std::next(MapIt->second); + auto I = MBB->SkipPHIsLabelsAndDebug(BeginIt); + if (I != BeginIt) + BBSkipInstsMap[MBB] = std::prev(I); return I; } Idx = Idx.getPrevIndex(); @@ -1306,21 +1645,24 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, /// Find an iterator for inserting the next DBG_VALUE instruction /// (or end if no more insert locations found). static MachineBasicBlock::iterator -findNextInsertLocation(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - SlotIndex StopIdx, MachineOperand &LocMO, - LiveIntervals &LIS, - const TargetRegisterInfo &TRI) { - if (!LocMO.isReg()) +findNextInsertLocation(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, + SlotIndex StopIdx, ArrayRef<MachineOperand> LocMOs, + LiveIntervals &LIS, const TargetRegisterInfo &TRI) { + SmallVector<Register, 4> Regs; + for (const MachineOperand &LocMO : LocMOs) + if (LocMO.isReg()) + Regs.push_back(LocMO.getReg()); + if (Regs.empty()) return MBB->instr_end(); - Register Reg = LocMO.getReg(); // Find the next instruction in the MBB that define the register Reg. while (I != MBB->end() && !I->isTerminator()) { if (!LIS.isNotInMIMap(*I) && SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I))) break; - if (I->definesRegister(Reg, &TRI)) + if (any_of(Regs, [&I, &TRI](Register &Reg) { + return I->definesRegister(Reg, &TRI); + })) // The insert location is directly after the instruction/bundle. return std::next(I); ++I; @@ -1330,23 +1672,30 @@ findNextInsertLocation(MachineBasicBlock *MBB, void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, SlotIndex StopIdx, DbgVariableValue DbgValue, - bool Spilled, unsigned SpillOffset, + ArrayRef<bool> LocSpills, + ArrayRef<unsigned> SpillOffsets, LiveIntervals &LIS, const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + BlockSkipInstsMap &BBSkipInstsMap) { SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB); // Only search within the current MBB. StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx; - MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); + MachineBasicBlock::iterator I = + findInsertLocation(MBB, StartIdx, LIS, BBSkipInstsMap); // Undef values don't exist in locations so create new "noreg" register MOs // for them. See getLocationNo(). - MachineOperand MO = - !DbgValue.isUndef() - ? locations[DbgValue.getLocNo()] - : MachineOperand::CreateReg( - /* Reg */ 0, /* isDef */ false, /* isImp */ false, - /* isKill */ false, /* isDead */ false, - /* isUndef */ false, /* isEarlyClobber */ false, - /* SubReg */ 0, /* isDebug */ true); + SmallVector<MachineOperand, 8> MOs; + if (DbgValue.isUndef()) { + MOs.assign(DbgValue.loc_nos().size(), + MachineOperand::CreateReg( + /* Reg */ 0, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)); + } else { + for (unsigned LocNo : DbgValue.loc_nos()) + MOs.push_back(locations[LocNo]); + } ++NumInsertedDebugValues; @@ -1359,32 +1708,45 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = DbgValue.getExpression(); - uint8_t DIExprFlags = DIExpression::ApplyOffset; bool IsIndirect = DbgValue.getWasIndirect(); - if (Spilled) { - if (IsIndirect) - DIExprFlags |= DIExpression::DerefAfter; - Expr = - DIExpression::prepend(Expr, DIExprFlags, SpillOffset); - IsIndirect = true; - } + bool IsList = DbgValue.getWasList(); + for (unsigned I = 0, E = LocSpills.size(); I != E; ++I) { + if (LocSpills[I]) { + if (!IsList) { + uint8_t DIExprFlags = DIExpression::ApplyOffset; + if (IsIndirect) + DIExprFlags |= DIExpression::DerefAfter; + Expr = DIExpression::prepend(Expr, DIExprFlags, SpillOffsets[I]); + IsIndirect = true; + } else { + SmallVector<uint64_t, 4> Ops; + DIExpression::appendOffset(Ops, SpillOffsets[I]); + Ops.push_back(dwarf::DW_OP_deref); + Expr = DIExpression::appendOpsToArg(Expr, Ops, I); + } + } - assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); + assert((!LocSpills[I] || MOs[I].isFI()) && + "a spilled location must be a frame index"); + } + unsigned DbgValueOpcode = + IsList ? TargetOpcode::DBG_VALUE_LIST : TargetOpcode::DBG_VALUE; do { - BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, MO, Variable, Expr); + BuildMI(*MBB, I, getDebugLoc(), TII.get(DbgValueOpcode), IsIndirect, MOs, + Variable, Expr); - // Continue and insert DBG_VALUES after every redefinition of register + // Continue and insert DBG_VALUES after every redefinition of a register // associated with the debug value within the range - I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI); + I = findNextInsertLocation(MBB, I, StopIdx, MOs, LIS, TRI); } while (I != MBB->end()); } void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, - const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + LiveIntervals &LIS, const TargetInstrInfo &TII, + BlockSkipInstsMap &BBSkipInstsMap) { + MachineBasicBlock::iterator I = + findInsertLocation(MBB, Idx, LIS, BBSkipInstsMap); ++NumInsertedDebugLabels; BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL)) .addMetadata(Label); @@ -1393,17 +1755,24 @@ void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx, void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const SpillOffsetMap &SpillOffsets) { + const SpillOffsetMap &SpillOffsets, + BlockSkipInstsMap &BBSkipInstsMap) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); for (LocMap::const_iterator I = locInts.begin(); I.valid();) { SlotIndex Start = I.start(); SlotIndex Stop = I.stop(); DbgVariableValue DbgValue = I.value(); - auto SpillIt = !DbgValue.isUndef() ? SpillOffsets.find(DbgValue.getLocNo()) - : SpillOffsets.end(); - bool Spilled = SpillIt != SpillOffsets.end(); - unsigned SpillOffset = Spilled ? SpillIt->second : 0; + + SmallVector<bool> SpilledLocs; + SmallVector<unsigned> LocSpillOffsets; + for (unsigned LocNo : DbgValue.loc_nos()) { + auto SpillIt = + !DbgValue.isUndef() ? SpillOffsets.find(LocNo) : SpillOffsets.end(); + bool Spilled = SpillIt != SpillOffsets.end(); + SpilledLocs.push_back(Spilled); + LocSpillOffsets.push_back(Spilled ? SpillIt->second : 0); + } // If the interval start was trimmed to the lexical scope insert the // DBG_VALUE at the previous index (otherwise it appears after the @@ -1411,14 +1780,14 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (trimmedDefs.count(Start)) Start = Start.getPrevIndex(); - LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop - << "):" << DbgValue.getLocNo()); + LLVM_DEBUG(auto &dbg = dbgs(); dbg << "\t[" << Start << ';' << Stop << "):"; + DbgValue.printLocNos(dbg)); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS, - TII, TRI); + insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs, LocSpillOffsets, + LIS, TII, TRI, BBSkipInstsMap); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while (Stop > MBBEnd) { @@ -1428,8 +1797,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; MBBEnd = LIS.getMBBEndIdx(&*MBB); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS, - TII, TRI); + insertDebugValue(&*MBB, Start, Stop, DbgValue, SpilledLocs, + LocSpillOffsets, LIS, TII, TRI, BBSkipInstsMap); } LLVM_DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -1439,12 +1808,13 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, } } -void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) { +void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII, + BlockSkipInstsMap &BBSkipInstsMap) { LLVM_DEBUG(dbgs() << "\t" << loc); MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator(); LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB)); - insertDebugLabel(&*MBB, loc, LIS, TII); + insertDebugLabel(&*MBB, loc, LIS, TII, BBSkipInstsMap); LLVM_DEBUG(dbgs() << '\n'); } @@ -1453,41 +1823,111 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); if (!MF) return; + + BlockSkipInstsMap BBSkipInstsMap; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); SpillOffsetMap SpillOffsets; for (auto &userValue : userValues) { LLVM_DEBUG(userValue->print(dbgs(), TRI)); userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets); - userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets); + userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets, + BBSkipInstsMap); } LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n"); for (auto &userLabel : userLabels) { LLVM_DEBUG(userLabel->print(dbgs(), TRI)); - userLabel->emitDebugLabel(*LIS, *TII); + userLabel->emitDebugLabel(*LIS, *TII, BBSkipInstsMap); } - LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n"); + LLVM_DEBUG(dbgs() << "********** EMITTING DEBUG PHIS **********\n"); - // Re-insert any DBG_INSTR_REFs back in the position they were. Ordering - // is preserved by vector. auto Slots = LIS->getSlotIndexes(); - const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); - for (auto &P : StashedInstrReferences) { - const SlotIndex &Idx = P.first; - auto *MBB = Slots->getMBBFromIndex(Idx); - MachineBasicBlock::iterator insertPos = findInsertLocation(MBB, Idx, *LIS); - for (auto &Stashed : P.second) { - auto MIB = BuildMI(*MF, std::get<4>(Stashed), RefII); - MIB.addImm(std::get<0>(Stashed)); - MIB.addImm(std::get<1>(Stashed)); - MIB.addMetadata(std::get<2>(Stashed)); - MIB.addMetadata(std::get<3>(Stashed)); - MachineInstr *New = MIB; - MBB->insert(insertPos, New); + for (auto &It : PHIValToPos) { + // For each ex-PHI, identify its physreg location or stack slot, and emit + // a DBG_PHI for it. + unsigned InstNum = It.first; + auto Slot = It.second.SI; + Register Reg = It.second.Reg; + unsigned SubReg = It.second.SubReg; + + MachineBasicBlock *OrigMBB = Slots->getMBBFromIndex(Slot); + if (VRM->isAssignedReg(Reg) && + Register::isPhysicalRegister(VRM->getPhys(Reg))) { + unsigned PhysReg = VRM->getPhys(Reg); + if (SubReg != 0) + PhysReg = TRI->getSubReg(PhysReg, SubReg); + + auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(), + TII->get(TargetOpcode::DBG_PHI)); + Builder.addReg(PhysReg); + Builder.addImm(InstNum); + } else if (VRM->getStackSlot(Reg) != VirtRegMap::NO_STACK_SLOT) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + unsigned SpillSize, SpillOffset; + + // Test whether this location is legal with the given subreg. + bool Success = + TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF); + + if (Success) { + auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(), + TII->get(TargetOpcode::DBG_PHI)); + Builder.addFrameIndex(VRM->getStackSlot(Reg)); + Builder.addImm(InstNum); + } + } + // If there was no mapping for a value ID, it's optimized out. Create no + // DBG_PHI, and any variables using this value will become optimized out. + } + MF->DebugPHIPositions.clear(); + + LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n"); + + // Re-insert any debug instrs back in the position they were. Ordering + // is preserved by vector. We must re-insert in the same order to ensure that + // debug instructions don't swap, which could re-order assignments. + for (auto &P : StashedDebugInstrs) { + SlotIndex Idx = P.Idx; + + // Start block index: find the first non-debug instr in the block, and + // insert before it. + if (Idx == Slots->getMBBStartIdx(P.MBB)) { + MachineBasicBlock::iterator InsertPos = + findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap); + P.MBB->insert(InsertPos, P.MI); + continue; + } + + if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) { + // Insert at the end of any debug instructions. + auto PostDebug = std::next(Pos->getIterator()); + PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end()); + P.MBB->insert(PostDebug, P.MI); + } else { + // Insert position disappeared; walk forwards through slots until we + // find a new one. + SlotIndex End = Slots->getMBBEndIdx(P.MBB); + for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) { + Pos = Slots->getInstructionFromIndex(Idx); + if (Pos) { + P.MBB->insert(Pos->getIterator(), P.MI); + break; + } + } + + // We have reached the end of the block and didn't find anywhere to + // insert! It's not safe to discard any debug instructions; place them + // in front of the first terminator, or in front of end(). + if (Idx >= End) { + auto TermIt = P.MBB->getFirstTerminator(); + P.MBB->insert(TermIt, P.MI); + } } } EmitDone = true; + BBSkipInstsMap.clear(); } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index ce0e58772068..1eed0ec5bbbe 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -487,7 +487,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, /// by [Start, End). bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); - const_iterator I = std::lower_bound(begin(), end(), End); + const_iterator I = lower_bound(*this, End); return I != begin() && (--I)->end > Start; } @@ -1336,9 +1336,8 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); assert(MBB && "Phi-def has no defining MBB"); // Connect to values live out of predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI))) + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(Pred))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. @@ -1361,12 +1360,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI) { // Rewrite instructions. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()), - RE = MRI.reg_end(); - RI != RE;) { - MachineOperand &MO = *RI; - MachineInstr *MI = RI->getParent(); - ++RI; + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI.reg_operands(LI.reg()))) { + MachineInstr *MI = MO.getParent(); const VNInfo *VNI; if (MI->isDebugValue()) { // DBG_VALUE instructions don't have slot indexes, so get the index of diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 7ccb8df4bc05..dfa523d4bf41 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { - return is_contained(InterferingVRegs, VirtReg); + return is_contained(*InterferingVRegs, VirtReg); } // Collect virtual registers in this union that interfere with this @@ -126,9 +126,12 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // unsigned LiveIntervalUnion::Query:: collectInterferingVRegs(unsigned MaxInterferingRegs) { + if (!InterferingVRegs) + InterferingVRegs.emplace(); + // Fast path return if we already have the desired information. - if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) - return InterferingVRegs.size(); + if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs) + return InterferingVRegs->size(); // Set up iterators on the first call. if (!CheckedFirstInterference) { @@ -157,14 +160,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveInterval *VReg = LiveUnionI.value(); if (VReg != RecentReg && !isSeenInterference(VReg)) { RecentReg = VReg; - InterferingVRegs.push_back(VReg); - if (InterferingVRegs.size() >= MaxInterferingRegs) - return InterferingVRegs.size(); + InterferingVRegs->push_back(VReg); + if (InterferingVRegs->size() >= MaxInterferingRegs) + return InterferingVRegs->size(); } // This LiveUnion segment is no longer interesting. if (!(++LiveUnionI).valid()) { SeenAllInterferences = true; - return InterferingVRegs.size(); + return InterferingVRegs->size(); } } @@ -185,7 +188,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveUnionI.advanceTo(LRI->start); } SeenAllInterferences = true; - return InterferingVRegs.size(); + return InterferingVRegs->size(); } void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index a32b486240c8..23036c2b115f 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Statepoint.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -47,6 +48,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/StackMaps.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -473,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Visit all instructions reading li->reg(). Register Reg = li->reg(); for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) { - if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg)) + if (UseMI.isDebugInstr() || !UseMI.readsVirtualRegister(Reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); LiveQueryResult LRQ = li->Query(Idx); @@ -702,9 +704,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU; - // Keep track of subregister ranges. - SmallVector<std::pair<const LiveInterval::SubRange*, - LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { Register Reg = Register::index2VirtReg(i); @@ -714,24 +713,21 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (LI.empty()) continue; + // Target may have not allocated this yet. + Register PhysReg = VRM->getPhys(Reg); + if (!PhysReg) + continue; + // Find the regunit intervals for the assigned register. They may overlap // the virtual register live range, cancelling any kills. RU.clear(); - for (MCRegUnitIterator Unit(VRM->getPhys(Reg), TRI); Unit.isValid(); + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { const LiveRange &RURange = getRegUnit(*Unit); if (RURange.empty()) continue; RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); } - - if (MRI->subRegLivenessEnabled()) { - SRs.clear(); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); - } - } - // Every instruction that kills Reg corresponds to a segment range end // point. for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE; @@ -776,20 +772,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // are actually never written by %2. After assignment the <kill> // flag at the read instruction is invalid. LaneBitmask DefinedLanesMask; - if (!SRs.empty()) { + if (LI.hasSubRanges()) { // Compute a mask of lanes that are defined. DefinedLanesMask = LaneBitmask::getNone(); - for (auto &SRP : SRs) { - const LiveInterval::SubRange &SR = *SRP.first; - LiveRange::const_iterator &I = SRP.second; - if (I == SR.end()) - continue; - I = SR.advanceTo(I, RI->end); - if (I == SR.end() || I->start >= RI->end) - continue; - // I is overlapping RI - DefinedLanesMask |= SR.LaneMask; - } + for (const LiveInterval::SubRange &SR : LI.subranges()) + for (const LiveRange::Segment &Segment : SR.segments) { + if (Segment.start >= RI->end) + break; + if (Segment.end == RI->end) { + DefinedLanesMask |= SR.LaneMask; + break; + } + } } else DefinedLanesMask = LaneBitmask::getAll(); @@ -799,7 +793,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { continue; if (MO.isUse()) { // Reading any undefined lanes? - LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + unsigned SubReg = MO.getSubReg(); + LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); if ((UseMask & ~DefinedLanesMask).any()) goto CancelKill; } else if (MO.getSubReg() == 0) { @@ -897,6 +893,23 @@ LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) { //===----------------------------------------------------------------------===// // Register mask functions //===----------------------------------------------------------------------===// +/// Check whether use of reg in MI is live-through. Live-through means that +/// the value is alive on exit from Machine instruction. The example of such +/// use is a deopt value in statepoint instruction. +static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) { + if (MI->getOpcode() != TargetOpcode::STATEPOINT) + return false; + StatepointOpers SO(MI); + if (SO.getFlags() & (uint64_t)StatepointFlags::DeoptLiveIn) + return false; + for (unsigned Idx = SO.getNumDeoptArgsIdx(), E = SO.getNumGCPtrIdx(); Idx < E; + ++Idx) { + const MachineOperand &MO = MI->getOperand(Idx); + if (MO.isReg() && MO.getReg() == Reg) + return true; + } + return false; +} bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, BitVector &UsableRegs) { @@ -925,11 +938,8 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, return false; bool Found = false; - while (true) { - assert(*SlotI >= LiveI->start); - // Loop over all slots overlapping this segment. - while (*SlotI < LiveI->end) { - // *SlotI overlaps LI. Collect mask bits. + // Utility to union regmasks. + auto unionBitMask = [&](unsigned Idx) { if (!Found) { // This is the first overlap. Initialize UsableRegs to all ones. UsableRegs.clear(); @@ -937,14 +947,28 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, Found = true; } // Remove usable registers clobbered by this mask. - UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]); + UsableRegs.clearBitsNotInMask(Bits[Idx]); + }; + while (true) { + assert(*SlotI >= LiveI->start); + // Loop over all slots overlapping this segment. + while (*SlotI < LiveI->end) { + // *SlotI overlaps LI. Collect mask bits. + unionBitMask(SlotI - Slots.begin()); if (++SlotI == SlotE) return Found; } + // If segment ends with live-through use we need to collect its regmask. + if (*SlotI == LiveI->end) + if (MachineInstr *MI = getInstructionFromIndex(*SlotI)) + if (hasLiveThroughUse(MI, LI.reg())) + unionBitMask(SlotI++ - Slots.begin()); // *SlotI is beyond the current LI segment. - LiveI = LI.advanceTo(LiveI, *SlotI); - if (LiveI == LiveE) + // Special advance implementation to not miss next LiveI->end. + if (++LiveI == LiveE || SlotI == SlotE || *SlotI > LI.endIndex()) return Found; + while (LiveI->end < *SlotI) + ++LiveI; // Advance SlotI until it overlaps. while (*SlotI < LiveI->start) if (++SlotI == SlotE) @@ -1465,7 +1489,7 @@ private: MachineBasicBlock::iterator Begin = MBB->begin(); while (MII != Begin) { - if ((--MII)->isDebugInstr()) + if ((--MII)->isDebugOrPseudoInstr()) continue; SlotIndex Idx = Indexes->getInstructionIndex(*MII); @@ -1560,7 +1584,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; SlotIndex instrIdx = getInstructionIndex(MI); @@ -1657,7 +1681,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index 547970e7ab5d..c0c7848139e4 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -125,8 +125,8 @@ void LivePhysRegs::print(raw_ostream &OS) const { return; } - for (const_iterator I = begin(), E = end(); I != E; ++I) - OS << " " << printReg(*I, TRI); + for (MCPhysReg R : *this) + OS << " " << printReg(R, TRI); OS << "\n"; } @@ -239,6 +239,10 @@ void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { addBlockLiveIns(MBB); } +void LivePhysRegs::addLiveInsNoPristines(const MachineBasicBlock &MBB) { + addBlockLiveIns(MBB); +} + void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index e9c9b70d29a9..3ef28042acb0 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -158,8 +158,7 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, // If LR has a segment S that starts at the next block, i.e. [End, ...), // std::upper_bound will return the segment following S. Instead, // S should be treated as the first segment that does not overlap B. - LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), - End.getPrevSlot()); + LiveRange::iterator UB = upper_bound(LR, End.getPrevSlot()); if (UB != LR.begin()) { LiveRange::Segment &Seg = *std::prev(UB); if (Seg.end > Begin) { diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 037cb5426235..64a2dd275643 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -113,9 +113,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; - // We can't remat physreg uses, unless it is a constant. + // We can't remat physreg uses, unless it is a constant or target wants + // to ignore this use. if (Register::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg())) + if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO)) continue; return false; } @@ -458,11 +459,8 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(Register VReg) { NewRegs.push_back(VReg); } -void -LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - const MachineLoopInfo &Loops, - const MachineBlockFrequencyInfo &MBFI) { - VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI); +void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + VirtRegAuxInfo &VRAI) { for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg())) diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index 26439a656917..054f4370b609 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -130,7 +130,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { MachineInstr &MI = *Next; ++Next; - if (MI.isPHI() || MI.isDebugInstr()) + if (MI.isPHI() || MI.isDebugOrPseudoInstr()) continue; if (MI.mayStore()) SawStore = true; @@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. - if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() && + Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } @@ -218,7 +219,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { MachineBasicBlock::iterator I = std::next(Insert->getIterator()); // Skip all the PHI and debug instructions. - while (I != MBB.end() && (I->isPHI() || I->isDebugInstr())) + while (I != MBB.end() && (I->isPHI() || I->isDebugOrPseudoInstr())) I = std::next(I); if (I == MI.getIterator()) continue; @@ -234,7 +235,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); if (MI.getOperand(0).isReg()) for (; EndIter != MBB.end() && EndIter->isDebugValue() && - EndIter->getDebugOperandForReg(MI.getOperand(0).getReg()); + EndIter->hasDebugOperandForReg(MI.getOperand(0).getReg()); ++EndIter, ++Next) IOM[&*EndIter] = NewOrder; MBB.splice(I, &MBB, MI.getIterator(), EndIter); diff --git a/llvm/lib/CodeGen/LiveRangeUtils.h b/llvm/lib/CodeGen/LiveRangeUtils.h index 0e6bfeb0d4a5..dace05f1ad95 100644 --- a/llvm/lib/CodeGen/LiveRangeUtils.h +++ b/llvm/lib/CodeGen/LiveRangeUtils.h @@ -5,9 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// +/// \file /// This file contains helper functions to modify live ranges. -// +/// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index a69aa6557e46..4c0172a930b5 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -216,7 +216,21 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, // Check for interference with that segment for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - if (query(LR, *Units).checkInterference()) + // LR is stack-allocated. LiveRegMatrix caches queries by a key that + // includes the address of the live range. If (for the same reg unit) this + // checkInterference overload is called twice, without any other query() + // calls in between (on heap-allocated LiveRanges) - which would invalidate + // the cached query - the LR address seen the second time may well be the + // same as that seen the first time, while the Start/End/valno may not - yet + // the same cached result would be fetched. To avoid that, we don't cache + // this query. + // + // FIXME: the usability of the Query API needs to be improved to avoid + // subtle bugs due to query identity. Avoiding caching, for example, would + // greatly simplify things. + LiveIntervalUnion::Query Q; + Q.reset(UserTag, LR, Matrix[*Units]); + if (Q.checkInterference()) return true; } return false; diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index ea2075bc139d..d8d8bd5d61a2 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -81,8 +81,17 @@ static void addBlockLiveIns(LiveRegUnits &LiveUnits, static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) - LiveUnits.addReg(*CSR); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) { + const unsigned N = *CSR; + + const auto &CSI = MFI.getCalleeSavedInfo(); + auto Info = + llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; }); + // If we have no info for this callee-saved register, assume it is liveout + if (Info == CSI.end() || Info->isRestored()) + LiveUnits.addReg(N); + } } void LiveRegUnits::addPristines(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 49b880c30936..7181dbc9c870 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -67,9 +67,8 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { dbgs() << " Alive in blocks: "; - for (SparseBitVector<>::iterator I = AliveBlocks.begin(), - E = AliveBlocks.end(); I != E; ++I) - dbgs() << *I << ", "; + for (unsigned AB : AliveBlocks) + dbgs() << AB << ", "; dbgs() << "\n Killed by:"; if (Kills.empty()) dbgs() << " No instructions.\n"; @@ -173,9 +172,8 @@ void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB, VRInfo.Kills.push_back(&MI); // Update all dominating blocks to mark them as "known live". - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - E = MBB->pred_end(); PI != E; ++PI) - MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), *PI); + for (MachineBasicBlock *Pred : MBB->predecessors()) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), Pred); } void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) { @@ -499,7 +497,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, void LiveVariables::runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { - assert(!MI.isDebugInstr()); + assert(!MI.isDebugOrPseudoInstr()); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI.getNumOperands(); @@ -574,7 +572,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { DistanceMap.clear(); unsigned Dist = 0; for (MachineInstr &MI : *MBB) { - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; DistanceMap.insert(std::make_pair(&MI, Dist++)); @@ -588,19 +586,16 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { if (!PHIVarInfo[MBB->getNumber()].empty()) { SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), - E = VarInfoVec.end(); I != E; ++I) + for (unsigned I : VarInfoVec) // Mark it alive only in the block we are representing. - MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MarkVirtRegAliveInBlock(getVarInfo(I), MRI->getVRegDef(I)->getParent(), MBB); } // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet<unsigned, 4> LiveOuts; - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; + for (const MachineBasicBlock *SuccMBB : MBB->successors()) { if (SuccMBB->isEHPad()) continue; for (const auto &LI : SuccMBB->liveins()) { @@ -665,8 +660,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // function. If so, it is due to a bug in the instruction selector or some // other part of the code generator if this happens. #ifndef NDEBUG - for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) - assert(Visited.contains(&*i) && "unreachable basic block found"); + for (const MachineBasicBlock &MBB : *MF) + assert(Visited.contains(&MBB) && "unreachable basic block found"); #endif PhysRegDef.clear(); @@ -779,13 +774,12 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Record all vreg defs and kills of all instructions in SuccBB. for (; BBI != BBE; ++BBI) { - for (MachineInstr::mop_iterator I = BBI->operands_begin(), - E = BBI->operands_end(); I != E; ++I) { - if (I->isReg() && Register::isVirtualRegister(I->getReg())) { - if (I->isDef()) - Defs.insert(I->getReg()); - else if (I->isKill()) - Kills.insert(I->getReg()); + for (const MachineOperand &Op : BBI->operands()) { + if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { + if (Op.isDef()) + Defs.insert(Op.getReg()); + else if (Op.isKill()) + Kills.insert(Op.getReg()); } } } @@ -817,8 +811,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, const unsigned NumNew = BB->getNumber(); SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()]; - for (auto R = BV.begin(), E = BV.end(); R != E; R++) { - Register VirtReg = Register::index2VirtReg(*R); + for (unsigned R : BV) { + Register VirtReg = Register::index2VirtReg(R); LiveVariables::VarInfo &VI = getVarInfo(VirtReg); VI.AliveBlocks.set(NumNew); } diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index ec6e693e8a46..2e99c8595cbd 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -176,9 +176,7 @@ void LocalStackSlotPass::AssignProtectedObjSet( const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, Align &MaxAlign) { - for (StackObjSet::const_iterator I = UnassignedObjs.begin(), - E = UnassignedObjs.end(); I != E; ++I) { - int i = *I; + for (int i : UnassignedObjs) { AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); ProtectedObjs.insert(i); } diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index 2bda586db8c7..62e9c6b629d3 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -20,11 +20,11 @@ using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { if (auto VTy = dyn_cast<VectorType>(&Ty)) { - auto NumElements = cast<FixedVectorType>(VTy)->getNumElements(); + auto EC = VTy->getElementCount(); LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL); - if (NumElements == 1) + if (EC.isScalar()) return ScalarTy; - return LLT::vector(NumElements, ScalarTy); + return LLT::vector(EC, ScalarTy); } if (auto PTy = dyn_cast<PointerType>(&Ty)) { @@ -56,8 +56,8 @@ LLT llvm::getLLTForMVT(MVT Ty) { if (!Ty.isVector()) return LLT::scalar(Ty.getSizeInBits()); - return LLT::vector(Ty.getVectorNumElements(), - Ty.getVectorElementType().getSizeInBits()); + return LLT::scalarOrVector(Ty.getVectorElementCount(), + Ty.getVectorElementType().getSizeInBits()); } const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) { diff --git a/llvm/lib/CodeGen/MBFIWrapper.cpp b/llvm/lib/CodeGen/MBFIWrapper.cpp index 4755defec793..efebb18c9908 100644 --- a/llvm/lib/CodeGen/MBFIWrapper.cpp +++ b/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp new file mode 100644 index 000000000000..bf78594e9b23 --- /dev/null +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -0,0 +1,137 @@ +//===-------- MIRFSDiscriminator.cpp: Flow Sensitive Discriminator --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of a machine pass that adds the flow +// sensitive discriminator to the instruction debug information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRFSDiscriminator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" +#include <unordered_map> + +using namespace llvm; +using namespace sampleprof; +using namespace sampleprofutil; + +#define DEBUG_TYPE "mirfs-discriminators" + +char MIRAddFSDiscriminators::ID = 0; + +INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, + "Add MIR Flow Sensitive Discriminators", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID; + +FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) { + return new MIRAddFSDiscriminators(P); +} + +// Compute a hash value using debug line number, and the line numbers from the +// inline stack. +static uint64_t getCallStackHash(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { + auto updateHash = [](const StringRef &Str) -> uint64_t { + if (Str.empty()) + return 0; + return MD5Hash(Str); + }; + uint64_t Ret = updateHash(std::to_string(DIL->getLine())); + Ret ^= updateHash(BB.getName()); + Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName()); + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= updateHash(std::to_string(DIL->getLine())); + Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName()); + } + return Ret; +} + +// Traverse the CFG and assign FD discriminators. If two instructions +// have the same lineno and discriminator, but residing in different BBs, +// the latter instruction will get a new discriminator value. The new +// discriminator keeps the existing discriminator value but sets new bits +// b/w LowBit and HighBit. +bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { + if (!EnableFSDiscriminator) + return false; + + bool Changed = false; + using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>; + using BBSet = DenseSet<const MachineBasicBlock *>; + using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>; + using LocationDiscriminatorCurrPassMap = + DenseMap<LocationDiscriminator, unsigned>; + + LocationDiscriminatorBBMap LDBM; + LocationDiscriminatorCurrPassMap LDCM; + + // Mask of discriminators before this pass. + unsigned BitMaskBefore = getN1Bits(LowBit); + // Mask of discriminators including this pass. + unsigned BitMaskNow = getN1Bits(HighBit); + // Mask of discriminators for bits specific to this pass. + unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore; + unsigned NumNewD = 0; + + LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: " + << MF.getFunction().getName() << "\n"); + for (MachineBasicBlock &BB : MF) { + for (MachineInstr &I : BB) { + const DILocation *DIL = I.getDebugLoc().get(); + if (!DIL) + continue; + unsigned LineNo = DIL->getLine(); + if (LineNo == 0) + continue; + unsigned Discriminator = DIL->getDiscriminator(); + LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator}; + auto &BBMap = LDBM[LD]; + auto R = BBMap.insert(&BB); + if (BBMap.size() == 1) + continue; + + unsigned DiscriminatorCurrPass; + DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; + DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; + DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + DiscriminatorCurrPass &= BitMaskThisPass; + unsigned NewD = Discriminator | DiscriminatorCurrPass; + const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD); + if (!NewDIL) { + LLVM_DEBUG(dbgs() << "Could not encode discriminator: " + << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " + << I << "\n"); + continue; + } + + I.setDebugLoc(NewDIL); + NumNewD++; + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ": add FS discriminator, from " + << Discriminator << " -> " << NewD << "\n"); + Changed = true; + } + } + + if (Changed) { + createFSDiscriminatorVariable(MF.getFunction().getParent()); + LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n"); + } + + return Changed; +} diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index b86fd6b41318..87fde7d39a60 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -226,6 +226,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) .Case("escape", MIToken::kw_cfi_escape) .Case("def_cfa", MIToken::kw_cfi_def_cfa) + .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa) .Case("remember_state", MIToken::kw_cfi_remember_state) .Case("restore", MIToken::kw_cfi_restore) .Case("restore_state", MIToken::kw_cfi_restore_state) @@ -271,6 +272,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) .Case("bbsections", MIToken::kw_bbsections) .Case("unknown-size", MIToken::kw_unknown_size) + .Case("unknown-address", MIToken::kw_unknown_address) + .Case("distinct", MIToken::kw_distinct) .Default(MIToken::Identifier); } diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 452eda721331..68425b41c3fb 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -83,6 +83,7 @@ struct MIToken { kw_cfi_adjust_cfa_offset, kw_cfi_escape, kw_cfi_def_cfa, + kw_cfi_llvm_def_aspace_cfa, kw_cfi_register, kw_cfi_remember_state, kw_cfi_restore, @@ -126,6 +127,10 @@ struct MIToken { kw_heap_alloc_marker, kw_bbsections, kw_unknown_size, + kw_unknown_address, + + // Metadata types. + kw_distinct, // Named metadata keywords md_tbaa, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index fe979b981886..34e1f9225d42 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -395,6 +395,7 @@ class MIParser { MachineFunction &MF; SMDiagnostic &Error; StringRef Source, CurrentSource; + SMRange SourceRange; MIToken Token; PerFunctionMIParsingState &PFS; /// Maps from slot numbers to function's unnamed basic blocks. @@ -403,6 +404,8 @@ class MIParser { public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, StringRef Source); + MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source, SMRange SourceRange); /// \p SkipChar gives the number of characters to skip before looking /// for the next token. @@ -428,6 +431,10 @@ public: bool parseStandaloneRegister(Register &Reg); bool parseStandaloneStackObject(int &FI); bool parseStandaloneMDNode(MDNode *&Node); + bool parseMachineMetadata(); + bool parseMDTuple(MDNode *&MD, bool IsDistinct); + bool parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts); + bool parseMetadata(Metadata *&MD); bool parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); @@ -472,6 +479,7 @@ public: bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(Register &Reg); + bool parseCFIAddressSpace(unsigned &AddressSpace); bool parseCFIEscapeValues(std::string& Values); bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); @@ -549,6 +557,10 @@ private: /// parseStringConstant /// ::= StringConstant bool parseStringConstant(std::string &Result); + + /// Map the location in the MI string to the corresponding location specified + /// in `SourceRange`. + SMLoc mapSMLoc(StringRef::iterator Loc); }; } // end anonymous namespace @@ -558,6 +570,11 @@ MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS) {} +MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source, SMRange SourceRange) + : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), + SourceRange(SourceRange), PFS(PFS) {} + void MIParser::lex(unsigned SkipChar) { CurrentSource = lexMIToken( CurrentSource.slice(SkipChar, StringRef::npos), Token, @@ -583,6 +600,13 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { return true; } +SMLoc MIParser::mapSMLoc(StringRef::iterator Loc) { + assert(SourceRange.isValid() && "Invalid source range"); + assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); + return SMLoc::getFromPointer(SourceRange.Start.getPointer() + + (Loc - Source.data())); +} + typedef function_ref<bool(StringRef::iterator Loc, const Twine &)> ErrorCallbackType; @@ -987,7 +1011,9 @@ bool MIParser::parse(MachineInstr *&MI) { Optional<unsigned> TiedDefIdx; if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx)) return true; - if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg()) + if ((OpCode == TargetOpcode::DBG_VALUE || + OpCode == TargetOpcode::DBG_VALUE_LIST) && + MO.isReg()) MO.setIsDebug(); Operands.push_back( ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); @@ -1168,6 +1194,130 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) { return false; } +bool MIParser::parseMachineMetadata() { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + if (expectAndConsume(MIToken::equal)) + return true; + bool IsDistinct = Token.is(MIToken::kw_distinct); + if (IsDistinct) + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + lex(); + + MDNode *MD; + if (parseMDTuple(MD, IsDistinct)) + return true; + + auto FI = PFS.MachineForwardRefMDNodes.find(ID); + if (FI != PFS.MachineForwardRefMDNodes.end()) { + FI->second.first->replaceAllUsesWith(MD); + PFS.MachineForwardRefMDNodes.erase(FI); + + assert(PFS.MachineMetadataNodes[ID] == MD && "Tracking VH didn't work"); + } else { + if (PFS.MachineMetadataNodes.count(ID)) + return error("Metadata id is already used"); + PFS.MachineMetadataNodes[ID].reset(MD); + } + + return false; +} + +bool MIParser::parseMDTuple(MDNode *&MD, bool IsDistinct) { + SmallVector<Metadata *, 16> Elts; + if (parseMDNodeVector(Elts)) + return true; + MD = (IsDistinct ? MDTuple::getDistinct + : MDTuple::get)(MF.getFunction().getContext(), Elts); + return false; +} + +bool MIParser::parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) { + if (Token.isNot(MIToken::lbrace)) + return error("expected '{' here"); + lex(); + + if (Token.is(MIToken::rbrace)) { + lex(); + return false; + } + + do { + Metadata *MD; + if (parseMetadata(MD)) + return true; + + Elts.push_back(MD); + + if (Token.isNot(MIToken::comma)) + break; + lex(); + } while (true); + + if (Token.isNot(MIToken::rbrace)) + return error("expected end of metadata node"); + lex(); + + return false; +} + +// ::= !42 +// ::= !"string" +bool MIParser::parseMetadata(Metadata *&MD) { + if (Token.isNot(MIToken::exclaim)) + return error("expected '!' here"); + lex(); + + if (Token.is(MIToken::StringConstant)) { + std::string Str; + if (parseStringConstant(Str)) + return true; + MD = MDString::get(MF.getFunction().getContext(), Str); + return false; + } + + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + + SMLoc Loc = mapSMLoc(Token.location()); + + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + + auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); + if (NodeInfo != PFS.IRSlots.MetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Check machine metadata. + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo != PFS.MachineMetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Forward reference. + auto &FwdRef = PFS.MachineForwardRefMDNodes[ID]; + FwdRef = std::make_pair( + MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc); + PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get()); + MD = FwdRef.first.get(); + + return false; +} + static const char *printImplicitRegisterFlag(const MachineOperand &MO) { assert(MO.isImplicit()); return MO.isDef() ? "implicit-def" : "implicit"; @@ -1726,7 +1876,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { return error(Loc, "expected <M x sN> or <M x pA> for vector type"); lex(); - Ty = LLT::vector(NumElements, Ty); + Ty = LLT::fixed_vector(NumElements, Ty); return false; } @@ -2010,8 +2160,11 @@ bool MIParser::parseMDNode(MDNode *&Node) { if (getUnsigned(ID)) return true; auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); - if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) - return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) { + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo == PFS.MachineMetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + } lex(); Node = NodeInfo->second.get(); return false; @@ -2205,6 +2358,16 @@ bool MIParser::parseCFIRegister(Register &Reg) { return false; } +bool MIParser::parseCFIAddressSpace(unsigned &AddressSpace) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi address space literal"); + if (Token.integerValue().isSigned()) + return error("expected an unsigned integer (cfi address space)"); + AddressSpace = Token.integerValue().getZExtValue(); + lex(); + return false; +} + bool MIParser::parseCFIEscapeValues(std::string &Values) { do { if (Token.isNot(MIToken::HexLiteral)) @@ -2225,6 +2388,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { lex(); int Offset; Register Reg; + unsigned AddressSpace; unsigned CFIIndex; switch (Kind) { case MIToken::kw_cfi_same_value: @@ -2271,6 +2435,14 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, Offset)); break; + case MIToken::kw_cfi_llvm_def_aspace_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset) || expectAndConsume(MIToken::comma) || + parseCFIAddressSpace(AddressSpace)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, Reg, Offset, AddressSpace)); + break; case MIToken::kw_cfi_remember_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); break; @@ -2618,6 +2790,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::kw_cfi_adjust_cfa_offset: case MIToken::kw_cfi_escape: case MIToken::kw_cfi_def_cfa: + case MIToken::kw_cfi_llvm_def_aspace_cfa: case MIToken::kw_cfi_register: case MIToken::kw_cfi_remember_state: case MIToken::kw_cfi_restore: @@ -2788,6 +2961,9 @@ static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS, V = C; break; } + case MIToken::kw_unknown_address: + V = nullptr; + return false; default: llvm_unreachable("The current token should be an IR block reference"); } @@ -2948,12 +3124,13 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) && Token.isNot(MIToken::GlobalValue) && Token.isNot(MIToken::NamedGlobalValue) && - Token.isNot(MIToken::QuotedIRValue)) + Token.isNot(MIToken::QuotedIRValue) && + Token.isNot(MIToken::kw_unknown_address)) return error("expected an IR value reference"); const Value *V = nullptr; if (parseIRValue(V)) return true; - if (!V->getType()->isPointerTy()) + if (V && !V->getType()->isPointerTy()) return error("expected a pointer IR value"); lex(); int64_t Offset = 0; @@ -3041,18 +3218,34 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseOptionalAtomicOrdering(FailureOrder)) return true; + LLT MemoryType; if (Token.isNot(MIToken::IntegerLiteral) && - Token.isNot(MIToken::kw_unknown_size)) - return error("expected the size integer literal or 'unknown-size' after " + Token.isNot(MIToken::kw_unknown_size) && + Token.isNot(MIToken::lparen)) + return error("expected memory LLT, the size integer literal or 'unknown-size' after " "memory operation"); - uint64_t Size; + + uint64_t Size = MemoryLocation::UnknownSize; if (Token.is(MIToken::IntegerLiteral)) { if (getUint64(Size)) return true; + + // Convert from bytes to bits for storage. + MemoryType = LLT::scalar(8 * Size); + lex(); } else if (Token.is(MIToken::kw_unknown_size)) { Size = MemoryLocation::UnknownSize; + lex(); + } else { + if (expectAndConsume(MIToken::lparen)) + return true; + if (parseLowLevelType(Token.location(), MemoryType)) + return true; + if (expectAndConsume(MIToken::rparen)) + return true; + + Size = MemoryType.getSizeInBytes(); } - lex(); MachinePointerInfo Ptr = MachinePointerInfo(); if (Token.is(MIToken::Identifier)) { @@ -3068,7 +3261,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMachinePointerInfo(Ptr)) return true; } - unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1); + unsigned BaseAlignment = + (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1); AAMDNodes AAInfo; MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { @@ -3115,8 +3309,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { } if (expectAndConsume(MIToken::rparen)) return true; - Dest = MF.getMachineMemOperand(Ptr, Flags, Size, Align(BaseAlignment), AAInfo, - Range, SSID, Order, FailureOrder); + Dest = MF.getMachineMemOperand(Ptr, Flags, MemoryType, Align(BaseAlignment), + AAInfo, Range, SSID, Order, FailureOrder); return false; } @@ -3252,6 +3446,11 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } +bool llvm::parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src, + SMRange SrcRange, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src, SrcRange).parseMachineMetadata(); +} + bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const Value *&V, ErrorCallbackType ErrorCallback) { diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index ffa9aeb21edb..d77104752880 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -51,9 +51,9 @@ namespace llvm { /// file. class MIRParserImpl { SourceMgr SM; + LLVMContext &Context; yaml::Input In; StringRef Filename; - LLVMContext &Context; SlotMapping IRSlots; std::unique_ptr<PerTargetMIParsingState> Target; @@ -143,6 +143,10 @@ public: bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS, const yaml::MachineJumpTable &YamlJTI); + bool parseMachineMetadataNodes(PerFunctionMIParsingState &PFS, + MachineFunction &MF, + const yaml::MachineFunction &YMF); + private: bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, const yaml::StringValue &Source); @@ -151,6 +155,9 @@ private: MachineBasicBlock *&MBB, const yaml::StringValue &Source); + bool parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source); + /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange); @@ -176,10 +183,11 @@ MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, LLVMContext &Context, std::function<void(Function &)> Callback) : SM(), + Context(Context), In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc())) ->getBuffer(), nullptr, handleYAMLDiag, this), - Filename(Filename), Context(Context), ProcessIRFunction(Callback) { + Filename(Filename), ProcessIRFunction(Callback) { In.setContext(&In); } @@ -417,8 +425,8 @@ void MIRParserImpl::setupDebugValueTracking( // Load any substitutions. for (auto &Sub : YamlMF.DebugValueSubstitutions) { - MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp), - std::make_pair(Sub.DstInst, Sub.DstOp)); + MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp}, + {Sub.DstInst, Sub.DstOp}, Sub.Subreg); } } @@ -456,6 +464,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (initializeConstantPool(PFS, *ConstantPool, YamlMF)) return true; } + if (!YamlMF.MachineMetadataNodes.empty() && + parseMachineMetadataNodes(PFS, MF, YamlMF)) + return true; StringRef BlockStr = YamlMF.Body.Value.Value; SMDiagnostic Error; @@ -646,10 +657,9 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, } }; - for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end(); - I != E; I++) { - const VRegInfo &Info = *I->second; - populateVRegInfo(Info, Twine(I->first())); + for (const auto &P : PFS.VRegInfosNamed) { + const VRegInfo &Info = *P.second; + populateVRegInfo(Info, Twine(P.first())); } for (auto P : PFS.VRegInfos) { @@ -700,6 +710,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + MFI.setHasTailCall(YamlMFI.HasTailCall); MFI.setLocalFrameSize(YamlMFI.LocalFrameSize); if (!YamlMFI.SavePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; @@ -919,6 +930,29 @@ bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS, return false; } +bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source) { + SMDiagnostic Error; + if (llvm::parseMachineMetadata(PFS, Source.Value, Source.SourceRange, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::parseMachineMetadataNodes( + PerFunctionMIParsingState &PFS, MachineFunction &MF, + const yaml::MachineFunction &YMF) { + for (auto &MDS : YMF.MachineMetadataNodes) { + if (parseMachineMetadata(PFS, MDS)) + return true; + } + // Report missing definitions from forward referenced nodes. + if (!PFS.MachineForwardRefMDNodes.empty()) + return error(PFS.MachineForwardRefMDNodes.begin()->second.second, + "use of undefined metadata '!" + + Twine(PFS.MachineForwardRefMDNodes.begin()->first) + "'"); + return false; +} + SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange) { assert(SourceRange.isValid() && "Invalid source range"); @@ -983,7 +1017,7 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { std::unique_ptr<MIRParser> llvm::createMIRParserFromFile( StringRef Filename, SMDiagnostic &Error, LLVMContext &Context, std::function<void(Function &)> ProcessIRFunction) { - auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); if (std::error_code EC = FileOrErr.getError()) { Error = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + EC.message()); diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index eae174019b56..2a78bb62762a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -29,13 +29,14 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleSlotTracker.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -135,6 +136,9 @@ public: void convertCallSiteObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST); + void convertMachineMetadataNodes(yaml::MachineFunction &YMF, + const MachineFunction &MF, + MachineModuleSlotTracker &MST); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -215,15 +219,19 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); - ModuleSlotTracker MST(MF.getFunction().getParent()); + MachineModuleSlotTracker MST(&MF); MST.incorporateFunction(MF.getFunction()); convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); convertCallSiteObjects(YamlMF, MF, MST); - for (auto &Sub : MF.DebugValueSubstitutions) - YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second, - Sub.second.first, - Sub.second.second}); + for (const auto &Sub : MF.DebugValueSubstitutions) { + const auto &SubSrc = Sub.Src; + const auto &SubDest = Sub.Dest; + YamlMF.DebugValueSubstitutions.push_back({SubSrc.first, SubSrc.second, + SubDest.first, + SubDest.second, + Sub.Subreg}); + } if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) @@ -243,6 +251,10 @@ void MIRPrinter::print(const MachineFunction &MF) { IsNewlineNeeded = true; } StrOS.flush(); + // Convert machine metadata collected during the print of the machine + // function. + convertMachineMetadataNodes(YamlMF, MF, MST); + yaml::Output Out(OS); if (!SimplifyMIR) Out.setWriteDefaultValues(true); @@ -351,6 +363,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); + YamlMFI.HasTailCall = MFI.hasTailCall(); YamlMFI.LocalFrameSize = MFI.getLocalFrameSize(); if (MFI.getSavePoint()) { raw_string_ostream StrOS(YamlMFI.SavePoint.Value); @@ -524,6 +537,19 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, }); } +void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF, + const MachineFunction &MF, + MachineModuleSlotTracker &MST) { + MachineModuleSlotTracker::MachineMDNodeListType MDList; + MST.collectMachineMDNodes(MDList); + for (auto &MD : MDList) { + std::string NS; + raw_string_ostream StrOS(NS); + MD.second->print(StrOS, MST, MF.getFunction().getParent()); + YMF.MachineMetadataNodes.push_back(StrOS.str()); + } +} + void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineConstantPool &ConstantPool) { unsigned ID = 0; diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 3d4f66f31174..5862504109f0 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -125,7 +125,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { MIOperands.push_back((unsigned)Op->getSize()); MIOperands.push_back((unsigned)Op->getFlags()); MIOperands.push_back((unsigned)Op->getOffset()); - MIOperands.push_back((unsigned)Op->getOrdering()); + MIOperands.push_back((unsigned)Op->getSuccessOrdering()); MIOperands.push_back((unsigned)Op->getAddrSpace()); MIOperands.push_back((unsigned)Op->getSyncScopeID()); MIOperands.push_back((unsigned)Op->getBaseAlign().value()); diff --git a/llvm/lib/CodeGen/MIRYamlMapping.cpp b/llvm/lib/CodeGen/MIRYamlMapping.cpp new file mode 100644 index 000000000000..b1a538cad8a0 --- /dev/null +++ b/llvm/lib/CodeGen/MIRYamlMapping.cpp @@ -0,0 +1,43 @@ +//===- MIRYamlMapping.cpp - Describe mapping between MIR and YAML ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the mapping between various MIR data structures and +// their corresponding YAML representation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::yaml; + +FrameIndex::FrameIndex(int FI, const llvm::MachineFrameInfo &MFI) { + IsFixed = MFI.isFixedObjectIndex(FI); + if (IsFixed) + FI -= MFI.getObjectIndexBegin(); + this->FI = FI; +} + +// Returns the value and if the frame index is fixed or not. +Expected<int> FrameIndex::getFI(const llvm::MachineFrameInfo &MFI) const { + int FI = this->FI; + if (IsFixed) { + if (unsigned(FI) >= MFI.getNumFixedObjects()) + return make_error<StringError>( + formatv("invalid fixed frame index {0}", FI).str(), + inconvertibleErrorCode()); + FI += MFI.getObjectIndexBegin(); + } + if (unsigned(FI + MFI.getNumFixedObjects()) >= MFI.getNumObjects()) + return make_error<StringError>(formatv("invalid frame index {0}", FI).str(), + inconvertibleErrorCode()); + return FI; +} diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index b4187af02975..c6914dcd0e54 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" @@ -87,6 +88,17 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } +MCSymbol *MachineBasicBlock::getEHCatchretSymbol() const { + if (!CachedEHCatchretMCSymbol) { + const MachineFunction *MF = getParent(); + SmallString<128> SymbolName; + raw_svector_ostream(SymbolName) + << "$ehgcr_" << MF->getFunctionNumber() << '_' << getNumber(); + CachedEHCatchretMCSymbol = MF->getContext().getOrCreateSymbol(SymbolName); + } + return CachedEHCatchretMCSymbol; +} + MCSymbol *MachineBasicBlock::getEndSymbol() const { if (!CachedEndMCSymbol) { const MachineFunction *MF = getParent(); @@ -210,11 +222,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { } MachineBasicBlock::iterator -MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { +MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I, + bool SkipPseudoOp) { const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); iterator E = end(); while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() || + (SkipPseudoOp && I->isPseudoProbe()) || TII->isBasicBlockPrologue(*I))) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values @@ -243,12 +257,14 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { return I; } -MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() { +MachineBasicBlock::iterator +MachineBasicBlock::getFirstNonDebugInstr(bool SkipPseudoOp) { // Skip over begin-of-block dbg_value instructions. - return skipDebugInstructionsForward(begin(), end()); + return skipDebugInstructionsForward(begin(), end(), SkipPseudoOp); } -MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { +MachineBasicBlock::iterator +MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) { // Skip over end-of-block dbg_value instructions. instr_iterator B = instr_begin(), I = instr_end(); while (I != B) { @@ -256,6 +272,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { // Return instruction that starts a bundle. if (I->isDebugInstr() || I->isInsideBundle()) continue; + if (SkipPseudoOp && I->isPseudoProbe()) + continue; return I; } // The block is all debug values. @@ -1075,10 +1093,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( I != E; ++I) NewTerminators.push_back(&*I); - for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), - E = Terminators.end(); I != E; ++I) { - if (!is_contained(NewTerminators, *I)) - Indexes->removeMachineInstrFromMaps(**I); + for (MachineInstr *Terminator : Terminators) { + if (!is_contained(NewTerminators, Terminator)) + Indexes->removeMachineInstrFromMaps(*Terminator); } } @@ -1361,6 +1378,14 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { return {}; } +DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) { + // Skip debug declarations, we don't want a DebugLoc from them. + MBBI = skipDebugInstructionsBackward(MBBI, instr_rbegin()); + if (!MBBI->isDebugInstr()) + return MBBI->getDebugLoc(); + return {}; +} + /// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE /// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) { @@ -1371,6 +1396,16 @@ DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) { return {}; } +DebugLoc MachineBasicBlock::rfindPrevDebugLoc(reverse_instr_iterator MBBI) { + if (MBBI == instr_rend()) + return {}; + // Skip debug declarations, we don't want a DebugLoc from them. + MBBI = next_nodbg(MBBI, instr_rend()); + if (MBBI != instr_rend()) + return MBBI->getDebugLoc(); + return {}; +} + /// Find and return the merged DebugLoc of the branch instructions of the block. /// Return UnknownLoc if there is none. DebugLoc @@ -1455,7 +1490,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // Try searching forwards from Before, looking for reads or defs. const_iterator I(Before); for (; I != end() && N > 0; ++I) { - if (I->isDebugInstr()) + if (I->isDebugOrPseudoInstr()) continue; --N; @@ -1493,7 +1528,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, do { --I; - if (I->isDebugInstr()) + if (I->isDebugOrPseudoInstr()) continue; --N; @@ -1527,7 +1562,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // If all the instructions before this in the block are debug instructions, // skip over them. - while (I != begin() && std::prev(I)->isDebugInstr()) + while (I != begin() && std::prev(I)->isDebugOrPseudoInstr()) --I; // Did we get to the start of the block? @@ -1569,6 +1604,23 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { return LiveIns.begin(); } +MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { + const MachineFunction &MF = *getParent(); + assert(MF.getProperties().hasProperty( + MachineFunctionProperties::Property::TracksLiveness) && + "Liveness information is accurate"); + + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + MCPhysReg ExceptionPointer = 0, ExceptionSelector = 0; + if (MF.getFunction().hasPersonalityFn()) { + auto PersonalityFn = MF.getFunction().getPersonalityFn(); + ExceptionPointer = TLI.getExceptionPointerRegister(PersonalityFn); + ExceptionSelector = TLI.getExceptionSelectorRegister(PersonalityFn); + } + + return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false); +} + const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 54e0a14e0555..c569f0350366 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -29,6 +29,7 @@ using namespace llvm; #define DEBUG_TYPE "machine-block-freq" +namespace llvm { static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "view-machine-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how machine block " @@ -75,6 +76,7 @@ static cl::opt<bool> PrintMachineBlockFreq( // Command line option to specify the name of the function for block frequency // dump. Defined in Analysis/BlockFrequencyInfo.cpp. extern cl::opt<std::string> PrintBlockFreqFuncName; +} // namespace llvm static GVDAGType getGVDT() { if (ViewBlockLayoutWithBFI != GVDT_None) @@ -231,14 +233,20 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const { Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( const MachineBasicBlock *MBB) const { + if (!MBFI) + return None; + const Function &F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None; + return MBFI->getBlockProfileCount(F, MBB); } Optional<uint64_t> MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { + if (!MBFI) + return None; + const Function &F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None; + return MBFI->getProfileCountFromFreq(F, Freq); } bool MachineBlockFrequencyInfo::isIrrLoopHeader( diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 048baa460e49..f61142d202eb 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -193,6 +193,7 @@ static cl::opt<unsigned> TriangleChainCount( cl::init(2), cl::Hidden); +namespace llvm { extern cl::opt<unsigned> StaticLikelyProb; extern cl::opt<unsigned> ProfileLikelyProb; @@ -204,6 +205,7 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI; // Command line option to specify the name of the function for CFG dump // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt<std::string> ViewBlockFreqFuncName; +} // namespace llvm namespace { @@ -3337,6 +3339,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TailDupSize = TailDupPlacementAggressiveThreshold; } + // If there's no threshold provided through options, query the target + // information for a threshold instead. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 && + (PassConfig->getOptLevel() < CodeGenOpt::Aggressive || + TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) + TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); + if (allowTailDupPlacement()) { MPDT = &getAnalysis<MachinePostDominatorTree>(); bool OptForSize = MF.getFunction().hasOptSize() || diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index f1d68c79a212..c9f762f9a6e7 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -25,6 +25,7 @@ INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", "Machine Branch Probability Analysis", false, true) +namespace llvm { cl::opt<unsigned> StaticLikelyProb("static-likely-prob", cl::desc("branch probability threshold in percentage" @@ -36,6 +37,7 @@ cl::opt<unsigned> ProfileLikelyProb( cl::desc("branch probability threshold in percentage to be considered" " very likely when profile is available"), cl::init(51), cl::Hidden); +} // namespace llvm char MachineBranchProbabilityInfo::ID = 0; @@ -66,26 +68,6 @@ bool MachineBranchProbabilityInfo::isEdgeHot( return getEdgeProbability(Src, Dst) > HotProb; } -MachineBasicBlock * -MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - auto MaxProb = BranchProbability::getZero(); - MachineBasicBlock *MaxSucc = nullptr; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - auto Prob = getEdgeProbability(MBB, I); - if (Prob > MaxProb) { - MaxProb = Prob; - MaxSucc = *I; - } - } - - BranchProbability HotProb(StaticLikelyProb, 100); - if (getEdgeProbability(MBB, MaxSucc) >= HotProb) - return MaxSucc; - - return nullptr; -} - raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( raw_ostream &OS, const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 199fe2dc6454..cb2e18e8c813 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -588,6 +588,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Examining: " << *MI); LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + // Prevent CSE-ing non-local convergent instructions. + // LLVM's current definition of `isConvergent` does not necessarily prove + // that non-local CSE is illegal. The following check extends the definition + // of `isConvergent` to assume a convergent instruction is dependent not + // only on additional conditions, but also on fewer conditions. LLVM does + // not have a MachineInstr attribute which expresses this extended + // definition, so it's necessary to use `isConvergent` to prevent illegally + // CSE-ing the subset of `isConvergent` instructions which do fall into this + // extended definition. + if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) { + LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " + "different BBs, avoid CSE!\n"); + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } + // Check if it's profitable to perform this CSE. bool DoCSE = true; unsigned NumDefs = MI->getNumDefs(); @@ -820,6 +837,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (BB != nullptr && BB1 != nullptr && (isPotentiallyReachable(BB1, BB) || isPotentiallyReachable(BB, BB1))) { + // The following check extends the definition of `isConvergent` to + // assume a convergent instruction is dependent not only on additional + // conditions, but also on fewer conditions. LLVM does not have a + // MachineInstr attribute which expresses this extended definition, so + // it's necessary to use `isConvergent` to prevent illegally PRE-ing the + // subset of `isConvergent` instructions which do fall into this + // extended definition. + if (MI->isConvergent() && CMBB != MBB) + continue; assert(MI->getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index d8659c1c7853..10b74f5f47f5 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -294,7 +294,7 @@ private: SmallSetVector<MachineInstr *, 8> MaybeDeadCopies; /// Multimap tracking debug users in current BB - DenseMap<MachineInstr*, SmallVector<MachineInstr*, 2>> CopyDbgUsers; + DenseMap<MachineInstr *, SmallSet<MachineInstr *, 2>> CopyDbgUsers; CopyTracker Tracker; @@ -321,7 +321,7 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); MaybeDeadCopies.remove(Copy); } else { - CopyDbgUsers[Copy].push_back(&Reader); + CopyDbgUsers[Copy].insert(&Reader); } } } @@ -734,7 +734,11 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Update matching debug values, if any. assert(MaybeDead->isCopy()); Register SrcReg = MaybeDead->getOperand(1).getReg(); - MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]); + Register DestReg = MaybeDead->getOperand(0).getReg(); + SmallVector<MachineInstr *> MaybeDeadDbgUsers( + CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end()); + MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(), + MaybeDeadDbgUsers); MaybeDead->eraseFromParent(); Changed = true; @@ -866,12 +870,32 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( if (MO.isDef()) Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); - if (MO.readsReg()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + if (MO.readsReg()) { + if (MO.isDebug()) { + // Check if the register in the debug instruction is utilized + // in a copy instruction, so we can update the debug info if the + // register is changed. + for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid(); + ++RUI) { + if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) { + CopyDbgUsers[Copy].insert(MI); + } + } + } else { + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + } + } } } for (auto *Copy : MaybeDeadCopies) { + + Register Src = Copy->getOperand(1).getReg(); + Register Def = Copy->getOperand(0).getReg(); + SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), + CopyDbgUsers[Copy].end()); + + MRI->updateDbgUsersToReg(Src.asMCReg(), Def.asMCReg(), MaybeDeadDbgUsers); Copy->eraseFromParent(); ++NumDeletes; } diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index 7ba27ff1c856..ca5936a14779 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -173,7 +173,7 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { // value. Align StackAlign; if (adjustsStack() || hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + (RegInfo->hasStackRealignment(MF) && getObjectIndexEnd() != 0)) StackAlign = TFI->getStackAlign(); else StackAlign = TFI->getTransientStackAlign(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 3f44578b1a2c..0a454b68aca3 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -438,15 +438,34 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( } MachineMemOperand *MachineFunction::getMachineMemOperand( - const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { - return new (Allocator) MachineMemOperand( - PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr, - MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); + MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, + Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, + SyncScope::ID SSID, AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) { + return new (Allocator) + MachineMemOperand(PtrInfo, f, MemTy, base_alignment, AAInfo, Ranges, SSID, + Ordering, FailureOrdering); +} + +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) { + return new (Allocator) + MachineMemOperand(PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); +} + +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, LLT Ty) { + return new (Allocator) + MachineMemOperand(PtrInfo, MMO->getFlags(), Ty, MMO->getBaseAlign(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, - int64_t Offset, uint64_t Size) { + int64_t Offset, LLT Ty) { const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); // If there is no pointer value, the offset isn't tracked so we need to adjust @@ -457,10 +476,10 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, // Do not preserve ranges, since we don't necessarily know what the high bits // are anymore. - return new (Allocator) - MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, - Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), - MMO->getOrdering(), MMO->getFailureOrdering()); + return new (Allocator) MachineMemOperand( + PtrInfo.getWithOffset(Offset), MMO->getFlags(), Ty, Alignment, + MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } MachineMemOperand * @@ -472,7 +491,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, return new (Allocator) MachineMemOperand( MPI, MMO->getFlags(), MMO->getSize(), MMO->getBaseAlign(), AAInfo, - MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(), + MMO->getRanges(), MMO->getSyncScopeID(), MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } @@ -482,7 +501,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, return new (Allocator) MachineMemOperand( MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlign(), MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(), - MMO->getOrdering(), MMO->getFailureOrdering()); + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo( @@ -850,9 +869,8 @@ int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) { // If the new filter coincides with the tail of an existing filter, then // re-use the existing filter. Folding filters more than this requires // re-ordering filters and/or their elements - probably not worth it. - for (std::vector<unsigned>::iterator I = FilterEnds.begin(), - E = FilterEnds.end(); I != E; ++I) { - unsigned i = *I, j = TyIds.size(); + for (unsigned i : FilterEnds) { + unsigned j = TyIds.size(); while (i && j) if (FilterIds[--i] != TyIds[--j]) @@ -951,10 +969,11 @@ void MachineFunction::setDebugInstrNumberingCount(unsigned Num) { } void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A, - DebugInstrOperandPair B) { - auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B)); - (void)Result; - assert(Result.second && "Substitution for an already substituted value?"); + DebugInstrOperandPair B, + unsigned Subreg) { + // Catch any accidental self-loops. + assert(A.first != B.first); + DebugValueSubstitutions.push_back({A, B, Subreg}); } void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, @@ -971,7 +990,7 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, // MIR output. // Examine all the operands, or the first N specified by the caller. MaxOperand = std::min(MaxOperand, Old.getNumOperands()); - for (unsigned int I = 0; I < Old.getNumOperands(); ++I) { + for (unsigned int I = 0; I < MaxOperand; ++I) { const auto &OldMO = Old.getOperand(I); auto &NewMO = New.getOperand(I); (void)NewMO; @@ -986,6 +1005,222 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, } } +auto MachineFunction::salvageCopySSA(MachineInstr &MI) + -> DebugInstrOperandPair { + MachineRegisterInfo &MRI = getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + const TargetInstrInfo &TII = *getSubtarget().getInstrInfo(); + + // Chase the value read by a copy-like instruction back to the instruction + // that ultimately _defines_ that value. This may pass: + // * Through multiple intermediate copies, including subregister moves / + // copies, + // * Copies from physical registers that must then be traced back to the + // defining instruction, + // * Or, physical registers may be live-in to (only) the entry block, which + // requires a DBG_PHI to be created. + // We can pursue this problem in that order: trace back through copies, + // optionally through a physical register, to a defining instruction. We + // should never move from physreg to vreg. As we're still in SSA form, no need + // to worry about partial definitions of registers. + + // Helper lambda to interpret a copy-like instruction. Takes instruction, + // returns the register read and any subregister identifying which part is + // read. + auto GetRegAndSubreg = + [&](const MachineInstr &Cpy) -> std::pair<Register, unsigned> { + Register NewReg, OldReg; + unsigned SubReg; + if (Cpy.isCopy()) { + OldReg = Cpy.getOperand(0).getReg(); + NewReg = Cpy.getOperand(1).getReg(); + SubReg = Cpy.getOperand(1).getSubReg(); + } else if (Cpy.isSubregToReg()) { + OldReg = Cpy.getOperand(0).getReg(); + NewReg = Cpy.getOperand(2).getReg(); + SubReg = Cpy.getOperand(3).getImm(); + } else { + auto CopyDetails = *TII.isCopyInstr(Cpy); + const MachineOperand &Src = *CopyDetails.Source; + const MachineOperand &Dest = *CopyDetails.Destination; + OldReg = Dest.getReg(); + NewReg = Src.getReg(); + SubReg = Src.getSubReg(); + } + + return {NewReg, SubReg}; + }; + + // First seek either the defining instruction, or a copy from a physreg. + // During search, the current state is the current copy instruction, and which + // register we've read. Accumulate qualifying subregisters into SubregsSeen; + // deal with those later. + auto State = GetRegAndSubreg(MI); + auto CurInst = MI.getIterator(); + SmallVector<unsigned, 4> SubregsSeen; + while (true) { + // If we've found a copy from a physreg, first portion of search is over. + if (!State.first.isVirtual()) + break; + + // Record any subregister qualifier. + if (State.second) + SubregsSeen.push_back(State.second); + + assert(MRI.hasOneDef(State.first)); + MachineInstr &Inst = *MRI.def_begin(State.first)->getParent(); + CurInst = Inst.getIterator(); + + // Any non-copy instruction is the defining instruction we're seeking. + if (!Inst.isCopyLike() && !TII.isCopyInstr(Inst)) + break; + State = GetRegAndSubreg(Inst); + }; + + // Helper lambda to apply additional subregister substitutions to a known + // instruction/operand pair. Adds new (fake) substitutions so that we can + // record the subregister. FIXME: this isn't very space efficient if multiple + // values are tracked back through the same copies; cache something later. + auto ApplySubregisters = + [&](DebugInstrOperandPair P) -> DebugInstrOperandPair { + for (unsigned Subreg : reverse(SubregsSeen)) { + // Fetch a new instruction number, not attached to an actual instruction. + unsigned NewInstrNumber = getNewDebugInstrNum(); + // Add a substitution from the "new" number to the known one, with a + // qualifying subreg. + makeDebugValueSubstitution({NewInstrNumber, 0}, P, Subreg); + // Return the new number; to find the underlying value, consumers need to + // deal with the qualifying subreg. + P = {NewInstrNumber, 0}; + } + return P; + }; + + // If we managed to find the defining instruction after COPYs, return an + // instruction / operand pair after adding subregister qualifiers. + if (State.first.isVirtual()) { + // Virtual register def -- we can just look up where this happens. + MachineInstr *Inst = MRI.def_begin(State.first)->getParent(); + for (auto &MO : Inst->operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != State.first) + continue; + return ApplySubregisters( + {Inst->getDebugInstrNum(), Inst->getOperandNo(&MO)}); + } + + llvm_unreachable("Vreg def with no corresponding operand?"); + } + + // Our search ended in a copy from a physreg: walk back up the function + // looking for whatever defines the physreg. + assert(CurInst->isCopyLike() || TII.isCopyInstr(*CurInst)); + State = GetRegAndSubreg(*CurInst); + Register RegToSeek = State.first; + + auto RMII = CurInst->getReverseIterator(); + auto PrevInstrs = make_range(RMII, CurInst->getParent()->instr_rend()); + for (auto &ToExamine : PrevInstrs) { + for (auto &MO : ToExamine.operands()) { + // Test for operand that defines something aliasing RegToSeek. + if (!MO.isReg() || !MO.isDef() || + !TRI.regsOverlap(RegToSeek, MO.getReg())) + continue; + + return ApplySubregisters( + {ToExamine.getDebugInstrNum(), ToExamine.getOperandNo(&MO)}); + } + } + + MachineBasicBlock &InsertBB = *CurInst->getParent(); + + // We reached the start of the block before finding a defining instruction. + // It could be from a constant register, otherwise it must be an argument. + if (TRI.isConstantPhysReg(State.first)) { + // We can produce a DBG_PHI that identifies the constant physreg. Doesn't + // matter where we put it, as it's constant valued. + assert(CurInst->isCopy()); + } else if (State.first == TRI.getFrameRegister(*this)) { + // LLVM IR is allowed to read the framepointer by calling a + // llvm.frameaddress.* intrinsic. We can support this by emitting a + // DBG_PHI $fp. This isn't ideal, because it extends the behaviours / + // position that DBG_PHIs appear at, limiting what can be done later. + // TODO: see if there's a better way of expressing these variable + // locations. + ; + } else { + // Assert that this is the entry block. If it isn't, then there is some + // code construct we don't recognise that deals with physregs across + // blocks. + assert(!State.first.isVirtual()); + assert(&*InsertBB.getParent()->begin() == &InsertBB); + } + + // Create DBG_PHI for specified physreg. + auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(), + TII.get(TargetOpcode::DBG_PHI)); + Builder.addReg(State.first, RegState::Debug); + unsigned NewNum = getNewDebugInstrNum(); + Builder.addImm(NewNum); + return ApplySubregisters({NewNum, 0u}); +} + +void MachineFunction::finalizeDebugInstrRefs() { + auto *TII = getSubtarget().getInstrInfo(); + + auto MakeDbgValue = [&](MachineInstr &MI) { + const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE); + MI.setDesc(RefII); + MI.getOperand(1).ChangeToRegister(0, false); + MI.getOperand(0).setIsDebug(); + }; + + if (!getTarget().Options.ValueTrackingVariableLocations) + return; + + for (auto &MBB : *this) { + for (auto &MI : MBB) { + if (!MI.isDebugRef() || !MI.getOperand(0).isReg()) + continue; + + Register Reg = MI.getOperand(0).getReg(); + + // Some vregs can be deleted as redundant in the meantime. Mark those + // as DBG_VALUE $noreg. + if (Reg == 0) { + MakeDbgValue(MI); + continue; + } + + assert(Reg.isVirtual()); + MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg); + assert(RegInfo->hasOneDef(Reg)); + + // If we've found a copy-like instruction, follow it back to the + // instruction that defines the source value, see salvageCopySSA docs + // for why this is important. + if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) { + auto Result = salvageCopySSA(DefMI); + MI.getOperand(0).ChangeToImmediate(Result.first); + MI.getOperand(1).setImm(Result.second); + } else { + // Otherwise, identify the operand number that the VReg refers to. + unsigned OperandIdx = 0; + for (const auto &MO : DefMI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) + break; + ++OperandIdx; + } + assert(OperandIdx < DefMI.getNumOperands()); + + // Morph this instr ref to point at the given instruction and operand. + unsigned ID = DefMI.getDebugInstrNum(); + MI.getOperand(0).ChangeToImmediate(ID); + MI.getOperand(1).setImm(OperandIdx); + } + } + } +} + /// \} //===----------------------------------------------------------------------===// @@ -1120,7 +1355,7 @@ unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const { bool MachineConstantPoolEntry::needsRelocation() const { if (isMachineConstantPoolEntry()) return true; - return Val.ConstVal->needsRelocation(); + return Val.ConstVal->needsDynamicRelocation(); } SectionKind @@ -1150,11 +1385,9 @@ MachineConstantPool::~MachineConstantPool() { Deleted.insert(Constants[i].Val.MachineCPVal); delete Constants[i].Val.MachineCPVal; } - for (DenseSet<MachineConstantPoolValue*>::iterator I = - MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); - I != E; ++I) { - if (Deleted.count(*I) == 0) - delete *I; + for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) { + if (Deleted.count(CPV) == 0) + delete CPV; } } diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 483809a8ed96..0e0eb8b8e00f 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -23,6 +23,7 @@ // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" @@ -77,7 +78,7 @@ public: }; } // end anonymous namespace -static bool isColdBlock(MachineBasicBlock &MBB, +static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); @@ -100,7 +101,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // since the split part may not be placed in a contiguous region. It may also // be more beneficial to augment the linker to ensure contiguous layout of // split functions within the same section as specified by the attribute. - if (!MF.getFunction().getSection().empty()) + if (MF.getFunction().hasSection() || + MF.getFunction().hasFnAttribute("implicit-section-name")) return false; // We don't want to proceed further for cold functions @@ -121,16 +123,28 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + SmallVector<MachineBasicBlock *, 2> LandingPads; for (auto &MBB : MF) { - // FIXME: We retain the entry block and conservatively keep all landing pad - // blocks as part of the original function. Once D73739 is submitted, we can - // improve the handling of ehpads. - if ((MBB.pred_empty() || MBB.isEHPad())) + if (MBB.isEntryBlock()) continue; - if (isColdBlock(MBB, MBFI, PSI)) + + if (MBB.isEHPad()) + LandingPads.push_back(&MBB); + else if (isColdBlock(MBB, MBFI, PSI)) MBB.setSectionID(MBBSectionID::ColdSectionID); } + // We only split out eh pads if all of them are cold. + bool HasHotLandingPads = false; + for (const MachineBasicBlock *LP : LandingPads) { + if (!isColdBlock(*LP, MBFI, PSI)) + HasHotLandingPads = true; + } + if (!HasHotLandingPads) { + for (MachineBasicBlock *LP : LandingPads) + LP->setSectionID(MBBSectionID::ColdSectionID); + } + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { return X.getSectionID().Type < Y.getSectionID().Type; }; diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 59d98054e3a2..0707945e7fb7 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -841,28 +841,35 @@ const DILabel *MachineInstr::getDebugLabel() const { } const MachineOperand &MachineInstr::getDebugVariableOp() const { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); - return getOperand(2); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); + unsigned VariableOp = isDebugValueList() ? 0 : 2; + return getOperand(VariableOp); } MachineOperand &MachineInstr::getDebugVariableOp() { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); - return getOperand(2); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); + unsigned VariableOp = isDebugValueList() ? 0 : 2; + return getOperand(VariableOp); } const DILocalVariable *MachineInstr::getDebugVariable() const { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); - return cast<DILocalVariable>(getOperand(2).getMetadata()); + return cast<DILocalVariable>(getDebugVariableOp().getMetadata()); +} + +const MachineOperand &MachineInstr::getDebugExpressionOp() const { + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); + unsigned ExpressionOp = isDebugValueList() ? 1 : 3; + return getOperand(ExpressionOp); } MachineOperand &MachineInstr::getDebugExpressionOp() { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); - return getOperand(3); + assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*"); + unsigned ExpressionOp = isDebugValueList() ? 1 : 3; + return getOperand(ExpressionOp); } const DIExpression *MachineInstr::getDebugExpression() const { - assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE"); - return cast<DIExpression>(getOperand(3).getMetadata()); + return cast<DIExpression>(getDebugExpressionOp().getMetadata()); } bool MachineInstr::isDebugEntryValue() const { @@ -1312,12 +1319,10 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize; - AliasResult AAResult = AA->alias( + return !AA->isNoAlias( MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), MemoryLocation(ValB, OverlapB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); - - return (AAResult != NoAlias); } bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, @@ -1462,7 +1467,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const { } bool MachineInstr::isLoadFoldBarrier() const { - return mayStore() || isCall() || hasUnmodeledSideEffects(); + return mayStore() || isCall() || + (hasUnmodeledSideEffects() && !isPseudoProbe()); } /// allDefsAreDead - Return true if all the defs of this instruction are dead. @@ -1711,7 +1717,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " "; if (isDebugValue() && MO.isMetadata()) { - // Pretty print DBG_VALUE instructions. + // Pretty print DBG_VALUE* instructions. auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); if (DIV && !DIV->getName().empty()) OS << "!\"" << DIV->getName() << '\"'; @@ -2056,9 +2062,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs, // This is a call with a register mask operand. // Mask clobbers are always dead, so add defs for the non-dead defines. if (HasRegMask) - for (ArrayRef<Register>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); - I != E; ++I) - addRegisterDefined(*I, &TRI); + for (const Register &UsedReg : UsedRegs) + addRegisterDefined(UsedReg, &TRI); } unsigned @@ -2078,7 +2083,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { void MachineInstr::emitError(StringRef Msg) const { // Find the source location cookie. - unsigned LocCookie = 0; + uint64_t LocCookie = 0; const MDNode *LocMD = nullptr; for (unsigned i = getNumOperands(); i != 0; --i) { if (getOperand(i-1).isMetadata() && @@ -2116,8 +2121,8 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - MachineOperand &MO, const MDNode *Variable, - const MDNode *Expr) { + const MachineOperand &MO, + const MDNode *Variable, const MDNode *Expr) { assert(isa<DILocalVariable>(Variable) && "not a variable"); assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && @@ -2131,7 +2136,28 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, else MIB.addReg(0U, RegState::Debug); return MIB.addMetadata(Variable).addMetadata(Expr); - } +} + +MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, + const MCInstrDesc &MCID, bool IsIndirect, + ArrayRef<MachineOperand> MOs, + const MDNode *Variable, const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + if (MCID.Opcode == TargetOpcode::DBG_VALUE) + return BuildMI(MF, DL, MCID, IsIndirect, MOs[0], Variable, Expr); + + auto MIB = BuildMI(MF, DL, MCID); + MIB.addMetadata(Variable).addMetadata(Expr); + for (const MachineOperand &MO : MOs) + if (MO.isReg()) + MIB.addReg(MO.getReg(), RegState::Debug); + else + MIB.add(MO); + return MIB; +} MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, @@ -2155,10 +2181,22 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, return MachineInstrBuilder(MF, *MI); } +MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, const MCInstrDesc &MCID, + bool IsIndirect, ArrayRef<MachineOperand> MOs, + const MDNode *Variable, const MDNode *Expr) { + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MOs, Variable, Expr); + BB.insert(I, MI); + return MachineInstrBuilder(MF, *MI); +} + /// Compute the new DIExpression to use with a DBG_VALUE for a spill slot. /// This prepends DW_OP_deref when spilling an indirect DBG_VALUE. -static const DIExpression *computeExprForSpill(const MachineInstr &MI) { - assert(MI.getOperand(0).isReg() && "can't spill non-register"); +static const DIExpression * +computeExprForSpill(const MachineInstr &MI, + SmallVectorImpl<const MachineOperand *> &SpilledOperands) { assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) && "Expected inlined-at fields to agree"); @@ -2167,26 +2205,76 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) { assert(MI.getDebugOffset().getImm() == 0 && "DBG_VALUE with nonzero offset"); Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); + } else if (MI.isDebugValueList()) { + // We will replace the spilled register with a frame index, so + // immediately deref all references to the spilled register. + std::array<uint64_t, 1> Ops{{dwarf::DW_OP_deref}}; + for (const MachineOperand *Op : SpilledOperands) { + unsigned OpIdx = MI.getDebugOperandIndex(Op); + Expr = DIExpression::appendOpsToArg(Expr, Ops, OpIdx); + } } return Expr; } +static const DIExpression *computeExprForSpill(const MachineInstr &MI, + Register SpillReg) { + assert(MI.hasDebugOperandForReg(SpillReg) && "Spill Reg is not used in MI."); + SmallVector<const MachineOperand *> SpillOperands; + for (const MachineOperand &Op : MI.getDebugOperandsForReg(SpillReg)) + SpillOperands.push_back(&Op); + return computeExprForSpill(MI, SpillOperands); +} MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const MachineInstr &Orig, - int FrameIndex) { - const DIExpression *Expr = computeExprForSpill(Orig); - return BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc()) - .addFrameIndex(FrameIndex) - .addImm(0U) - .addMetadata(Orig.getDebugVariable()) - .addMetadata(Expr); -} - -void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) { - const DIExpression *Expr = computeExprForSpill(Orig); - Orig.getDebugOperand(0).ChangeToFrameIndex(FrameIndex); - Orig.getDebugOffset().ChangeToImmediate(0U); + int FrameIndex, Register SpillReg) { + const DIExpression *Expr = computeExprForSpill(Orig, SpillReg); + MachineInstrBuilder NewMI = + BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc()); + // Non-Variadic Operands: Location, Offset, Variable, Expression + // Variadic Operands: Variable, Expression, Locations... + if (Orig.isNonListDebugValue()) + NewMI.addFrameIndex(FrameIndex).addImm(0U); + NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr); + if (Orig.isDebugValueList()) { + for (const MachineOperand &Op : Orig.debug_operands()) + if (Op.isReg() && Op.getReg() == SpillReg) + NewMI.addFrameIndex(FrameIndex); + else + NewMI.add(MachineOperand(Op)); + } + return NewMI; +} +MachineInstr *llvm::buildDbgValueForSpill( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + const MachineInstr &Orig, int FrameIndex, + SmallVectorImpl<const MachineOperand *> &SpilledOperands) { + const DIExpression *Expr = computeExprForSpill(Orig, SpilledOperands); + MachineInstrBuilder NewMI = + BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc()); + // Non-Variadic Operands: Location, Offset, Variable, Expression + // Variadic Operands: Variable, Expression, Locations... + if (Orig.isNonListDebugValue()) + NewMI.addFrameIndex(FrameIndex).addImm(0U); + NewMI.addMetadata(Orig.getDebugVariable()).addMetadata(Expr); + if (Orig.isDebugValueList()) { + for (const MachineOperand &Op : Orig.debug_operands()) + if (is_contained(SpilledOperands, &Op)) + NewMI.addFrameIndex(FrameIndex); + else + NewMI.add(MachineOperand(Op)); + } + return NewMI; +} + +void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex, + Register Reg) { + const DIExpression *Expr = computeExprForSpill(Orig, Reg); + if (Orig.isNonListDebugValue()) + Orig.getDebugOffset().ChangeToImmediate(0U); + for (MachineOperand &Op : Orig.getDebugOperandsForReg(Reg)) + Op.ChangeToFrameIndex(FrameIndex); Orig.getDebugExpressionOp().setMetadata(Expr); } @@ -2201,7 +2289,7 @@ void MachineInstr::collectDebugValues( DI != DE; ++DI) { if (!DI->isDebugValue()) return; - if (DI->getDebugOperandForReg(MI.getOperand(0).getReg())) + if (DI->hasDebugOperandForReg(MI.getOperand(0).getReg())) DbgValues.push_back(&*DI); } } @@ -2219,14 +2307,15 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) { auto *DI = MO.getParent(); if (!DI->isDebugValue()) continue; - if (DI->getDebugOperandForReg(DefReg)) { + if (DI->hasDebugOperandForReg(DefReg)) { DbgValues.push_back(DI); } } // Propagate Reg to debug value instructions. for (auto *DBI : DbgValues) - DBI->getDebugOperandForReg(DefReg)->setReg(Reg); + for (MachineOperand &Op : DBI->getDebugOperandsForReg(DefReg)) + Op.setReg(Reg); } using MMOList = SmallVector<const MachineMemOperand *, 2>; @@ -2285,3 +2374,9 @@ unsigned MachineInstr::getDebugInstrNum() { DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum(); return DebugInstrNum; } + +unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) { + if (DebugInstrNum == 0) + DebugInstrNum = MF.getNewDebugInstrNum(); + return DebugInstrNum; +} diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index 50456e489ea1..6ca97031b92a 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -47,11 +47,9 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { return false; bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - - for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), - MIE = MBB->instr_end(); MII != MIE; ) { + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), + MIE = MBB.instr_end(); MII != MIE; ) { MachineInstr *MI = &*MII; // Remove BUNDLE instruction and the InsideBundle flags from bundled @@ -256,8 +254,7 @@ llvm::finalizeBundle(MachineBasicBlock &MBB, /// MachineFunction. Return true if any bundles are finalized. bool llvm::finalizeBundles(MachineFunction &MF) { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock &MBB = *I; + for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); if (MII == MIE) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index c06bc39b4940..883299c452b7 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -69,11 +69,6 @@ HoistCheapInsts("hoist-cheap-insts", cl::init(false), cl::Hidden); static cl::opt<bool> -SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", - cl::desc("MachineLICM should sink instructions into " - "loops to avoid register spills"), - cl::init(false), cl::Hidden); -static cl::opt<bool> HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden); @@ -246,8 +241,6 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *HeaderN); - void SinkIntoLoop(); - void InitRegPressure(MachineBasicBlock *BB); DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, @@ -395,9 +388,6 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); - - if (SinkInstsToAvoidSpills) - SinkIntoLoop(); } } @@ -787,88 +777,6 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -void MachineLICMBase::SinkIntoLoop() { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - return; - - SmallVector<MachineInstr *, 8> Candidates; - for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); - I != Preheader->instr_end(); ++I) { - // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. - LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { - LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); - Candidates.push_back(&*I); - continue; - } - LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); - } - - for (MachineInstr *I : Candidates) { - const MachineOperand &MO = I->getOperand(0); - if (!MO.isDef() || !MO.isReg() || !MO.getReg()) - continue; - if (!MRI->hasOneDef(MO.getReg())) - continue; - bool CanSink = true; - MachineBasicBlock *SinkBlock = nullptr; - LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I); - - for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump()); - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - CanSink = false; - break; - } - if (!SinkBlock) { - SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: " - << printMBBReference(*SinkBlock) << "\n"); - continue; - } - SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n"); - CanSink = false; - break; - } - LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " << - printMBBReference(*SinkBlock) << "\n"); - } - if (!CanSink) { - LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n"); - continue; - } - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n"); - continue; - } - if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n"); - continue; - } - - LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock) - << " from " << printMBBReference(*I->getParent()) - << ": " << *I); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); - - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!I->isDebugInstr() && "Should not sink debug inst"); - I->setDebugLoc(DebugLoc()); - } -} - static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } @@ -1056,11 +964,11 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { return false; } - // If it is load then check if it is guaranteed to execute by making sure that - // it dominates all exiting blocks. If it doesn't, then there is a path out of - // the loop which does not execute this load, so we can't hoist it. Loads - // from constant memory are not safe to speculate all the time, for example - // indexed load from a jump table. + // If it is a load then check if it is guaranteed to execute by making sure + // that it dominates all exiting blocks. If it doesn't, then there is a path + // out of the loop which does not execute this load, so we can't hoist it. + // Loads from constant memory are safe to speculate, for example indexed load + // from a jump table. // Stores and side effects are already checked by isSafeToMove. if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && !IsGuaranteedToExecute(I.getParent())) { diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 78480d0e1488..8f91a5b698d0 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -115,8 +115,8 @@ DebugLoc MachineLoop::getStartLoc() const { } MachineBasicBlock * -MachineLoopInfo::findLoopPreheader(MachineLoop *L, - bool SpeculativePreheader) const { +MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader, + bool FindMultiLoopPreheader) const { if (MachineBasicBlock *PB = L->getLoopPreheader()) return PB; @@ -139,12 +139,14 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, // Check if the preheader candidate is a successor of any other loop // headers. We want to avoid having two loop setups in the same block. - for (MachineBasicBlock *S : Preheader->successors()) { - if (S == HB) - continue; - MachineLoop *T = getLoopFor(S); - if (T && T->getHeader() == S) - return nullptr; + if (!FindMultiLoopPreheader) { + for (MachineBasicBlock *S : Preheader->successors()) { + if (S == HB) + continue; + MachineLoop *T = getLoopFor(S); + if (T && T->getHeader() == S) + return nullptr; + } } return Preheader; } diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 5565b9cededa..50cbb14e926e 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -16,7 +16,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -76,11 +78,25 @@ class MMIAddrLabelMap { /// we get notified if a block is deleted or RAUWd. std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; + /// This is a per-function list of symbols whose corresponding BasicBlock got + /// deleted. These symbols need to be emitted at some point in the file, so + /// AsmPrinter emits them after the function body. + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>> + DeletedAddrLabelsNeedingEmission; + public: MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + } + ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB); + void takeDeletedSymbolsForFunction(Function *F, + std::vector<MCSymbol*> &Result); + void UpdateForDeletedBlock(BasicBlock *BB); void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); }; @@ -110,6 +126,20 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { return Entry.Symbols; } +/// If we have any deleted symbols for F, return them. +void MMIAddrLabelMap:: +takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // If the block got deleted, there is no need for the symbol. If the symbol // was already emitted, we can just forget about it, otherwise we need to @@ -122,8 +152,16 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && "Block/parent mismatch"); - assert(llvm::all_of(Entry.Symbols, [](MCSymbol *Sym) { - return Sym->isDefined(); })); + for (MCSymbol *Sym : Entry.Symbols) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } } void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { @@ -158,6 +196,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; + NextFnNum = 0; UsesMSVCFloatingPoint = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; @@ -178,9 +217,11 @@ void MachineModuleInfo::finalize() { MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) : TM(std::move(MMI.TM)), - Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), - MMI.TM.getObjFileLowering(), nullptr, nullptr, false), + Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(), + MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr, + nullptr, false), MachineFunctions(std::move(MMI.MachineFunctions)) { + Context.setObjectFileInfo(MMI.TM.getObjFileLowering()); ObjFileMMI = MMI.ObjFileMMI; CurCallSite = MMI.CurCallSite; UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; @@ -193,16 +234,20 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) } MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) - : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), - TM->getObjFileLowering(), nullptr, nullptr, false) { + : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), + TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), + nullptr, nullptr, false) { + Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); } MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MCContext *ExtContext) - : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), - TM->getObjFileLowering(), nullptr, nullptr, false), + : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), + TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), + nullptr, nullptr, false), ExternalContext(ExtContext) { + Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); } @@ -218,14 +263,21 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); } +void MachineModuleInfo:: +takeDeletedSymbolsForFunction(const Function *F, + std::vector<MCSymbol*> &Result) { + // If no blocks have had their addresses taken, we're done. + if (!AddrLabelSymbols) return; + return AddrLabelSymbols-> + takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); +} + /// \name Exception Handling /// \{ void MachineModuleInfo::addPersonality(const Function *Personality) { - for (unsigned i = 0; i < Personalities.size(); ++i) - if (Personalities[i] == Personality) - return; - Personalities.push_back(Personality); + if (!llvm::is_contained(Personalities, Personality)) + Personalities.push_back(Personality); } /// \} @@ -317,9 +369,44 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfoWrapperPass::ID = 0; +static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, + std::vector<const MDNode *> &LocInfos) { + // Look up a LocInfo for the buffer this diagnostic is coming from. + unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc()); + const MDNode *LocInfo = nullptr; + if (BufNum > 0 && BufNum <= LocInfos.size()) + LocInfo = LocInfos[BufNum - 1]; + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (LocInfo) { + unsigned ErrorLine = SMD.getLineNo() - 1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + return LocCookie; +} + bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { MMI.initialize(); MMI.TheModule = &M; + // FIXME: Do this for new pass manager. + LLVMContext &Ctx = M.getContext(); + MMI.getContext().setDiagnosticHandler( + [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, + std::vector<const MDNode *> &LocInfos) { + unsigned LocCookie = 0; + if (IsInlineAsm) + LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); + Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie)); + }); MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); return false; } diff --git a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp index 16d24880ebe4..9c3b31935f6d 100644 --- a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -25,6 +25,7 @@ using namespace llvm; void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} void MachineModuleInfoCOFF::anchor() {} +void MachineModuleInfoWasm::anchor() {} using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>; static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) { diff --git a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp new file mode 100644 index 000000000000..e4da179efcc4 --- /dev/null +++ b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp @@ -0,0 +1,81 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleSlotTracker.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +using namespace llvm; + +void MachineModuleSlotTracker::processMachineFunctionMetadata( + AbstractSlotTrackerStorage *AST, const MachineFunction &MF) { + // Create metadata created within the backend. + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB.instrs()) + for (const MachineMemOperand *MMO : MI.memoperands()) { + AAMDNodes AAInfo = MMO->getAAInfo(); + if (AAInfo.TBAA) + AST->createMetadataSlot(AAInfo.TBAA); + if (AAInfo.TBAAStruct) + AST->createMetadataSlot(AAInfo.TBAAStruct); + if (AAInfo.Scope) + AST->createMetadataSlot(AAInfo.Scope); + if (AAInfo.NoAlias) + AST->createMetadataSlot(AAInfo.NoAlias); + } +} + +void MachineModuleSlotTracker::processMachineModule( + AbstractSlotTrackerStorage *AST, const Module *M, + bool ShouldInitializeAllMetadata) { + if (ShouldInitializeAllMetadata) { + for (const Function &F : *M) { + if (&F != &TheFunction) + continue; + MDNStartSlot = AST->getNextMetadataSlot(); + if (auto *MF = TheMMI.getMachineFunction(F)) + processMachineFunctionMetadata(AST, *MF); + MDNEndSlot = AST->getNextMetadataSlot(); + break; + } + } +} + +void MachineModuleSlotTracker::processMachineFunction( + AbstractSlotTrackerStorage *AST, const Function *F, + bool ShouldInitializeAllMetadata) { + if (!ShouldInitializeAllMetadata && F == &TheFunction) { + MDNStartSlot = AST->getNextMetadataSlot(); + if (auto *MF = TheMMI.getMachineFunction(*F)) + processMachineFunctionMetadata(AST, *MF); + MDNEndSlot = AST->getNextMetadataSlot(); + } +} + +void MachineModuleSlotTracker::collectMachineMDNodes( + MachineMDNodeListType &L) const { + collectMDNodes(L, MDNStartSlot, MDNEndSlot); +} + +MachineModuleSlotTracker::MachineModuleSlotTracker( + const MachineFunction *MF, bool ShouldInitializeAllMetadata) + : ModuleSlotTracker(MF->getFunction().getParent(), + ShouldInitializeAllMetadata), + TheFunction(MF->getFunction()), TheMMI(MF->getMMI()), MDNStartSlot(0), + MDNEndSlot(0) { + setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M, + bool ShouldInitializeAllMetadata) { + this->processMachineModule(AST, M, ShouldInitializeAllMetadata); + }); + setProcessHook([this](AbstractSlotTrackerStorage *AST, const Function *F, + bool ShouldInitializeAllMetadata) { + this->processMachineFunction(AST, F, ShouldInitializeAllMetadata); + }); +} + +MachineModuleSlotTracker::~MachineModuleSlotTracker() = default; diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 9b09f5273298..b8ba0453d24c 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -653,6 +653,14 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, printCFIRegister(CFI.getRegister(), OS, TRI); OS << ", " << CFI.getOffset(); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + OS << "llvm_def_aspace_cfa "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + OS << ", " << CFI.getAddressSpace(); + break; case MCCFIInstruction::OpRelOffset: OS << "rel_offset "; if (MCSymbol *Label = CFI.getLabel()) @@ -927,7 +935,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, case MachineOperand::MO_IntrinsicID: { Intrinsic::ID ID = getIntrinsicID(); if (ID < Intrinsic::num_intrinsics) - OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; + OS << "intrinsic(@" << Intrinsic::getBaseName(ID) << ')'; else if (IntrinsicInfo) OS << "intrinsic(@" << IntrinsicInfo->getName(ID) << ')'; else @@ -1015,13 +1023,12 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, Align a, - const AAMDNodes &AAInfo, + LLT type, Align a, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlign(a), AAInfo(AAInfo), - Ranges(Ranges) { + : PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a), + AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() || isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) && "invalid pointer value"); @@ -1030,16 +1037,26 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, AtomicInfo.SSID = static_cast<unsigned>(SSID); assert(getSyncScopeID() == SSID && "Value truncated"); AtomicInfo.Ordering = static_cast<unsigned>(Ordering); - assert(getOrdering() == Ordering && "Value truncated"); + assert(getSuccessOrdering() == Ordering && "Value truncated"); AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); assert(getFailureOrdering() == FailureOrdering && "Value truncated"); } +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, + uint64_t s, Align a, + const AAMDNodes &AAInfo, + const MDNode *Ranges, SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : MachineMemOperand(ptrinfo, f, + s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a, + AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} + /// Profile - Gather unique data for the object. /// void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(getOffset()); - ID.AddInteger(Size); + ID.AddInteger(getMemoryType().getUniqueRAWLLTData()); ID.AddPointer(getOpaqueValue()); ID.AddInteger(getFlags()); ID.AddInteger(getBaseAlign().value()); @@ -1060,10 +1077,6 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { } } -/// getAlignment - Return the minimum known alignment in bytes of the -/// actual memory reference. -uint64_t MachineMemOperand::getAlignment() const { return getAlign().value(); } - /// getAlign - Return the minimum known alignment in bytes of the /// actual memory reference. Align MachineMemOperand::getAlign() const { @@ -1103,15 +1116,15 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printSyncScope(OS, Context, getSyncScopeID(), SSNs); - if (getOrdering() != AtomicOrdering::NotAtomic) - OS << toIRString(getOrdering()) << ' '; + if (getSuccessOrdering() != AtomicOrdering::NotAtomic) + OS << toIRString(getSuccessOrdering()) << ' '; if (getFailureOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(getFailureOrdering()) << ' '; - if (getSize() == MemoryLocation::UnknownSize) - OS << "unknown-size"; + if (getMemoryType().isValid()) + OS << '(' << getMemoryType() << ')'; else - OS << getSize(); + OS << "unknown-size"; if (const Value *Val = getValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); @@ -1160,9 +1173,14 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, break; } } + } else if (getOpaqueValue() == nullptr && getOffset() != 0) { + OS << ((isLoad() && isStore()) ? " on " + : isLoad() ? " from " + : " into ") + << "unknown-address"; } MachineOperand::printOperandOffset(OS, getOffset()); - if (getAlign() != getSize()) + if (getSize() > 0 && getAlign() != getSize()) OS << ", align " << getAlign().value(); if (getAlign() != getBaseAlign()) OS << ", basealign " << getBaseAlign().value(); diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index dcb8e4073ea3..59fc23983d3d 100644 --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -93,7 +93,7 @@ static const char ore_name[] = "Machine Optimization Remark Emitter"; #define ORE_NAME "machine-opt-remark-emitter" INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name, - false, true) + true, true) INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass) INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name, - false, true) + true, true) diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 02998d41d831..1d55bd00e033 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -518,9 +518,8 @@ void MachineOutliner::findCandidates( // First, find all of the repeated substrings in the tree of minimum length // 2. std::vector<Candidate> CandidatesForRepeatedSeq; - for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { + for (const SuffixTree::RepeatedSubstring &RS : ST) { CandidatesForRepeatedSeq.clear(); - SuffixTree::RepeatedSubstring RS = *It; unsigned StringLen = RS.Length; for (const unsigned &StartIdx : RS.StartIndices) { unsigned EndIdx = StartIdx + StringLen - 1; @@ -807,7 +806,7 @@ bool MachineOutliner::outline(Module &M, if (MOP.isDef()) { // Introduce DefRegs set to skip the redundant register. DefRegs.insert(MOP.getReg()); - if (UseRegs.count(MOP.getReg())) + if (!MOP.isDead() && UseRegs.count(MOP.getReg())) // Since the regiester is modeled as defined, // it is not necessary to be put in use register set. UseRegs.erase(MOP.getReg()); diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp index e81575c88935..476dc059d2b5 100644 --- a/llvm/lib/CodeGen/MachinePassManager.cpp +++ b/llvm/lib/CodeGen/MachinePassManager.cpp @@ -49,11 +49,6 @@ Error MachineFunctionPassManager::run(Module &M, }); } - if (DebugLogging) { - dbgs() << "Starting " << getTypeName<MachineFunction>() - << " pass manager run.\n"; - } - for (auto &F : InitializationFuncs) { if (auto Err = F(M, MFAM)) return Err; @@ -64,9 +59,6 @@ Error MachineFunctionPassManager::run(Module &M, do { // Run machine module passes for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) { - if (DebugLogging) - dbgs() << "Running pass: " << Passes[Idx]->name() << " on " - << M.getName() << '\n'; if (auto Err = MachineModulePasses.at(Idx)(M, MFAM)) return Err; } @@ -110,11 +102,6 @@ Error MachineFunctionPassManager::run(Module &M, return Err; } - if (DebugLogging) { - dbgs() << "Finished " << getTypeName<MachineFunction>() - << " pass manager run.\n"; - } - return Error::success(); } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index d0fe29f65ede..caa3f8049aeb 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -812,11 +813,10 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { SU.addPred(Dep); continue; } - AliasResult AAResult = AA->alias( - MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), - MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())); - - if (AAResult != NoAlias) { + if (!AA->isNoAlias( + MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()), + MemoryLocation::getAfter(MMO2->getValue(), + MMO2->getAAInfo()))) { SDep Dep(Load, SDep::Barrier); Dep.setLatency(1); SU.addPred(Dep); @@ -949,10 +949,9 @@ void SwingSchedulerDAG::changeDependences() { // Remove the dependence. The value now depends on a prior iteration. SmallVector<SDep, 4> Deps; - for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E; - ++P) - if (P->getSUnit() == DefSU) - Deps.push_back(*P); + for (const SDep &P : I.Preds) + if (P.getSUnit() == DefSU) + Deps.push_back(P); for (int i = 0, e = Deps.size(); i != e; i++) { Topo.RemovePred(&I, Deps[i].getSUnit()); I.removePred(Deps[i]); @@ -1203,12 +1202,10 @@ static void swapAntiDependences(std::vector<SUnit> &SUnits) { DepsAdded.push_back(std::make_pair(SU, *IP)); } } - for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(), - E = DepsAdded.end(); - I != E; ++I) { + for (std::pair<SUnit *, SDep> &P : DepsAdded) { // Remove this anti dependency and add one in the reverse direction. - SUnit *SU = I->first; - SDep &D = I->second; + SUnit *SU = P.first; + SDep &D = P.second; SUnit *TargetSU = D.getSUnit(); unsigned Reg = D.getReg(); unsigned Lat = D.getLatency(); @@ -1447,22 +1444,18 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { ScheduleInfo.resize(SUnits.size()); LLVM_DEBUG({ - for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), - E = Topo.end(); - I != E; ++I) { - const SUnit &SU = SUnits[*I]; + for (int I : Topo) { + const SUnit &SU = SUnits[I]; dumpNode(SU); } }); int maxASAP = 0; // Compute ASAP and ZeroLatencyDepth. - for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), - E = Topo.end(); - I != E; ++I) { + for (int I : Topo) { int asap = 0; int zeroLatencyDepth = 0; - SUnit *SU = &SUnits[*I]; + SUnit *SU = &SUnits[I]; for (SUnit::const_pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end(); IP != EP; ++IP) { @@ -1476,8 +1469,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { getDistance(pred, SU, *IP) * MII)); } maxASAP = std::max(maxASAP, asap); - ScheduleInfo[*I].ASAP = asap; - ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth; + ScheduleInfo[I].ASAP = asap; + ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth; } // Compute ALAP, ZeroLatencyHeight, and MOV. @@ -1531,25 +1524,22 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, Preds.clear(); for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); I != E; ++I) { - for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end(); - PI != PE; ++PI) { - if (S && S->count(PI->getSUnit()) == 0) + for (const SDep &Pred : (*I)->Preds) { + if (S && S->count(Pred.getSUnit()) == 0) continue; - if (ignoreDependence(*PI, true)) + if (ignoreDependence(Pred, true)) continue; - if (NodeOrder.count(PI->getSUnit()) == 0) - Preds.insert(PI->getSUnit()); + if (NodeOrder.count(Pred.getSUnit()) == 0) + Preds.insert(Pred.getSUnit()); } // Back-edges are predecessors with an anti-dependence. - for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(), - ES = (*I)->Succs.end(); - IS != ES; ++IS) { - if (IS->getKind() != SDep::Anti) + for (const SDep &Succ : (*I)->Succs) { + if (Succ.getKind() != SDep::Anti) continue; - if (S && S->count(IS->getSUnit()) == 0) + if (S && S->count(Succ.getSUnit()) == 0) continue; - if (NodeOrder.count(IS->getSUnit()) == 0) - Preds.insert(IS->getSUnit()); + if (NodeOrder.count(Succ.getSUnit()) == 0) + Preds.insert(Succ.getSUnit()); } } return !Preds.empty(); @@ -1564,24 +1554,21 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, Succs.clear(); for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); I != E; ++I) { - for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end(); - SI != SE; ++SI) { - if (S && S->count(SI->getSUnit()) == 0) + for (SDep &Succ : (*I)->Succs) { + if (S && S->count(Succ.getSUnit()) == 0) continue; - if (ignoreDependence(*SI, false)) + if (ignoreDependence(Succ, false)) continue; - if (NodeOrder.count(SI->getSUnit()) == 0) - Succs.insert(SI->getSUnit()); + if (NodeOrder.count(Succ.getSUnit()) == 0) + Succs.insert(Succ.getSUnit()); } - for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(), - PE = (*I)->Preds.end(); - PI != PE; ++PI) { - if (PI->getKind() != SDep::Anti) + for (SDep &Pred : (*I)->Preds) { + if (Pred.getKind() != SDep::Anti) continue; - if (S && S->count(PI->getSUnit()) == 0) + if (S && S->count(Pred.getSUnit()) == 0) continue; - if (NodeOrder.count(PI->getSUnit()) == 0) - Succs.insert(PI->getSUnit()); + if (NodeOrder.count(Pred.getSUnit()) == 0) + Succs.insert(Pred.getSUnit()); } } return !Succs.empty(); @@ -1613,14 +1600,6 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, return FoundPath; } -/// Return true if Set1 is a subset of Set2. -template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) { - for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I) - if (Set2.count(*I) == 0) - return false; - return true; -} - /// Compute the live-out registers for the instructions in a node-set. /// The live-out registers are those that are defined in the node-set, /// but not used. Except for use operands of Phis. @@ -1724,7 +1703,7 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { SmallSetVector<SUnit *, 8> S2; if (N2.empty() || !succ_L(N2, S2)) continue; - if (isSubset(S1, S2) && S1.size() == S2.size()) { + if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) { N1.setColocate(++Colocate); N2.setColocate(Colocate); break; @@ -1807,11 +1786,10 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { // Create new nodes sets with the connected nodes any remaining node that // has no predecessor. - for (unsigned i = 0; i < SUnits.size(); ++i) { - SUnit *SU = &SUnits[i]; - if (NodesAdded.count(SU) == 0) { + for (SUnit &SU : SUnits) { + if (NodesAdded.count(&SU) == 0) { NewSet.clear(); - addConnectedNodes(SU, NewSet, NodesAdded); + addConnectedNodes(&SU, NewSet, NodesAdded); if (!NewSet.empty()) NodeSets.push_back(NewSet); } @@ -1858,9 +1836,8 @@ void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) { if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) { if (NJ.compareRecMII(NI) > 0) NI.setRecMII(NJ.getRecMII()); - for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI; - ++NII) - I->insert(*NII); + for (SUnit *SU : *J) + I->insert(SU); NodeSets.erase(J); E = NodeSets.end(); } else { @@ -1898,11 +1875,11 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); OrderKind Order; SmallSetVector<SUnit *, 8> N; - if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) { + if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = BottomUp; LLVM_DEBUG(dbgs() << " Bottom up (preds) "); - } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) { + } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = TopDown; LLVM_DEBUG(dbgs() << " Top down (succs) "); @@ -2049,9 +2026,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { } bool scheduleFound = false; - unsigned II = 0; // Keep increasing II until a valid schedule is found. - for (II = MII; II <= MAX_II && !scheduleFound; ++II) { + for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) { Schedule.reset(); Schedule.setInitiationInterval(II); LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n"); @@ -2124,7 +2100,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { scheduleFound = Schedule.isValidSchedule(this); } - LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II + LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound + << " (II=" << Schedule.getInitiationInterval() << ")\n"); if (scheduleFound) { @@ -2132,7 +2109,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { Pass.ORE->emit([&]() { return MachineOptimizationRemarkAnalysis( DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) - << "Schedule found with Initiation Interval: " << ore::NV("II", II) + << "Schedule found with Initiation Interval: " + << ore::NV("II", Schedule.getInitiationInterval()) << ", MaxStageCount: " << ore::NV("MaxStageCount", Schedule.getMaxStageCount()); }); @@ -2404,14 +2382,12 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { checkCycle <= LastCycle; checkCycle += II) { std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle]; - for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(), - E = cycleInstrs.end(); - I != E; ++I) { - if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode())) + for (SUnit *CI : cycleInstrs) { + if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode())) continue; - assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) && + assert(ProcItinResources.canReserveResources(*CI->getInstr()) && "These instructions have already been scheduled."); - ProcItinResources.reserveResources(*(*I)->getInstr()); + ProcItinResources.reserveResources(*CI->getInstr()); } } if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || @@ -2742,8 +2718,7 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, // different stage than the definition. The pipeliner does not handle // physical register values that may cross a basic block boundary. bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { - for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) { - SUnit &SU = SSD->SUnits[i]; + for (SUnit &SU : SSD->SUnits) { if (!SU.hasPhysRegDefs) continue; int StageDef = stageScheduled(&SU); @@ -2939,14 +2914,12 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) { std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle]; std::deque<SUnit *> newOrderPhi; - for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { - SUnit *SU = cycleInstrs[i]; + for (SUnit *SU : cycleInstrs) { if (SU->getInstr()->isPHI()) newOrderPhi.push_back(SU); } std::deque<SUnit *> newOrderI; - for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { - SUnit *SU = cycleInstrs[i]; + for (SUnit *SU : cycleInstrs) { if (!SU->getInstr()->isPHI()) orderDependence(SSD, SU, newOrderI); } diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 5325eda9d478..3f6b11e072b4 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -434,8 +434,8 @@ void MachineRegisterInfo::clearKillFlags(Register Reg) const { } bool MachineRegisterInfo::isLiveIn(Register Reg) const { - for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) - if ((Register)I->first == Reg || I->second == Reg) + for (const std::pair<MCRegister, Register> &LI : liveins()) + if ((Register)LI.first == Reg || LI.second == Reg) return true; return false; } @@ -443,18 +443,18 @@ bool MachineRegisterInfo::isLiveIn(Register Reg) const { /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. MCRegister MachineRegisterInfo::getLiveInPhysReg(Register VReg) const { - for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) - if (I->second == VReg) - return I->first; + for (const std::pair<MCRegister, Register> &LI : liveins()) + if (LI.second == VReg) + return LI.first; return MCRegister(); } /// getLiveInVirtReg - If PReg is a live-in physical register, return the /// corresponding live-in physical register. Register MachineRegisterInfo::getLiveInVirtReg(MCRegister PReg) const { - for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) - if (I->first == PReg) - return I->second; + for (const std::pair<MCRegister, Register> &LI : liveins()) + if (LI.first == PReg) + return LI.second; return Register(); } @@ -530,14 +530,11 @@ bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. void MachineRegisterInfo::markUsesInDebugValueAsUndef(Register Reg) const { - // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.) - MachineRegisterInfo::use_instr_iterator nextI; - for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end(); - I != E; I = nextI) { - nextI = std::next(I); // I is invalidated by the setReg - MachineInstr *UseMI = &*I; - if (UseMI->isDebugValue()) - UseMI->getDebugOperandForReg(Reg)->setReg(0U); + // Mark any DBG_VALUE* that uses Reg as undef (but don't delete it.) + // We use make_early_inc_range because setReg invalidates the iterator. + for (MachineInstr &UseMI : llvm::make_early_inc_range(use_instructions(Reg))) { + if (UseMI.isDebugValue() && UseMI.hasDebugOperandForReg(Reg)) + UseMI.setDebugValueUndef(); } } @@ -585,8 +582,9 @@ bool MachineRegisterInfo::isPhysRegModified(MCRegister PhysReg, return false; } -bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const { - if (UsedPhysRegMask.test(PhysReg)) +bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg, + bool SkipRegMaskTest) const { + if (!SkipRegMaskTest && UsedPhysRegMask.test(PhysReg)) return true; const TargetRegisterInfo *TRI = getTargetRegisterInfo(); for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid(); diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 462082df5d05..930677e4fd7d 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -164,9 +164,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { Register SingularValue; bool isFirstPred = true; - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) { - MachineBasicBlock *PredBB = *PI; + for (MachineBasicBlock *PredBB : BB->predecessors()) { Register PredVal = GetValueAtEndOfBlockInternal(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); @@ -236,10 +234,10 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) { U.setReg(NewVR); } -/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl -/// template, specialized for MachineSSAUpdater. namespace llvm { +/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl +/// template, specialized for MachineSSAUpdater. template<> class SSAUpdaterTraits<MachineSSAUpdater> { public: @@ -284,9 +282,7 @@ public: /// vector. static void FindPredecessorBlocks(MachineBasicBlock *BB, SmallVectorImpl<MachineBasicBlock*> *Preds){ - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) - Preds->push_back(*PI); + append_range(*Preds, BB->predecessors()); } /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 8d51bb26103a..4f42a2c8aeff 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -297,7 +297,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { - if (!I->isDebugInstr()) + if (!I->isDebugOrPseudoInstr()) break; } return I; @@ -317,7 +317,7 @@ static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { - if (!I->isDebugInstr()) + if (!I->isDebugOrPseudoInstr()) break; } return I; @@ -508,7 +508,7 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugInstr()) { + if (!MI.isDebugOrPseudoInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; @@ -927,8 +927,8 @@ void ScheduleDAGMI::placeDebugValues() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { - for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { - if (SUnit *SU = getSUnit(&(*MI))) + for (MachineInstr &MI : *this) { + if (SUnit *SU = getSUnit(&MI)) dumpNode(*SU); else dbgs() << "Missing SUnit\n"; @@ -1927,17 +1927,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { } LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. - for (SmallVectorImpl<SUnit*>::const_iterator - I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { - LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + for (SUnit *LU : LocalUses) { + LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU(" << GlobalSU->NodeNum << ")\n"); - DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); + DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak)); } - for (SmallVectorImpl<SUnit*>::const_iterator - I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { - LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + for (SUnit *GU : GlobalUses) { + LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU(" << FirstLocalSU->NodeNum << ")\n"); - DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); + DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak)); } } @@ -2006,6 +2004,7 @@ void SchedBoundary::reset() { IsResourceLimited = false; ReservedCycles.clear(); ReservedCyclesIndex.clear(); + ResourceGroupSubUnitMasks.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -2047,11 +2046,18 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); ReservedCyclesIndex.resize(ResourceCount); ExecutedResCounts.resize(ResourceCount); + ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0)); unsigned NumUnits = 0; for (unsigned i = 0; i < ResourceCount; ++i) { ReservedCyclesIndex[i] = NumUnits; NumUnits += SchedModel->getProcResource(i)->NumUnits; + if (isUnbufferedGroup(i)) { + auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; + for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; + U != UE; ++U) + ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]); + } } ReservedCycles.resize(NumUnits, InvalidCycle); @@ -2093,7 +2099,9 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, /// scheduled. Returns the next cycle and the index of the processor resource /// instance in the reserved cycles vector. std::pair<unsigned, unsigned> -SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { +SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; unsigned InstanceIdx = 0; unsigned StartIndex = ReservedCyclesIndex[PIdx]; @@ -2101,6 +2109,35 @@ SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { assert(NumberOfInstances > 0 && "Cannot have zero instances of a ProcResource"); + if (isUnbufferedGroup(PIdx)) { + // If any subunits are used by the instruction, report that the resource + // group is available at 0, effectively removing the group record from + // hazarding and basing the hazarding decisions on the subunit records. + // Otherwise, choose the first available instance from among the subunits. + // Specifications which assign cycles to both the subunits and the group or + // which use an unbuffered group with buffered subunits will appear to + // schedule strangely. In the first case, the additional cycles for the + // group will be ignored. In the second, the group will be ignored + // entirely. + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) + if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx]) + return std::make_pair(0u, StartIndex); + + auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; + for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { + unsigned NextUnreserved, NextInstanceIdx; + std::tie(NextUnreserved, NextInstanceIdx) = + getNextResourceCycle(SC, SubUnits[I], Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = NextInstanceIdx; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); + } + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; ++I) { unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); @@ -2154,7 +2191,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) { unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; unsigned NRCycle, InstanceIdx; - std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2304,8 +2341,8 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { +unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" @@ -2327,7 +2364,7 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable, InstanceIdx; - std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2407,7 +2444,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } @@ -2422,7 +2459,8 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); + std::tie(ReservedUntil, InstanceIdx) = + getNextResourceCycle(SC, PIdx, 0); if (isTop()) { ReservedCycles[InstanceIdx] = std::max(ReservedUntil, NextCycle + PI->Cycles); @@ -2780,6 +2818,8 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { namespace llvm { /// Return true if this heuristic determines order. +/// TODO: Consider refactor return type of these functions as integer or enum, +/// as we may need to differentiate whether TryCand is better than Cand. bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, @@ -3138,34 +3178,35 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. /// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, +/// if Cand is from a different zone than TryCand. +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -3179,12 +3220,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; } // Keep clustered nodes together to encourage downstream peephole @@ -3200,14 +3241,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. @@ -3215,31 +3256,34 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { TryCand.Reason = NodeOrder; + return true; } } + + return false; } /// Pick the best candidate from the queue. @@ -3261,8 +3305,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; - tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand, ZoneArg)) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); @@ -3340,8 +3383,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { assert(TopCand.isValid()); SchedCandidate Cand = BotCand; TopCand.Reason = NoCand; - tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (tryCandidate(Cand, TopCand, nullptr)) { Cand.setBest(TopCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3505,42 +3547,47 @@ void PostGenericScheduler::registerRoots() { /// /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return TryCand.Reason != NoCand; } // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { TryCand.Reason = NodeOrder; + return true; + } + + return false; } void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { @@ -3550,8 +3597,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); - tryCandidate(Cand, TryCand); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand)) { Cand.setBest(TryCand); LLVM_DEBUG(traceCandidate(Cand)); } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 378df1b75e25..ec98394dca79 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -91,7 +92,19 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold( "the straight line is higher than this threshold."), cl::init(20), cl::Hidden); +static cl::opt<bool> +SinkInstsIntoLoop("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into loops to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> SinkIntoLoopLimit( + "machine-sink-loop-limit", + cl::desc("The maximum number of instructions considered for loop sinking."), + cl::init(50), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -216,6 +229,11 @@ namespace { bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); + + void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl<MachineInstr *> &Candidates); + bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, @@ -340,6 +358,60 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg, return true; } +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert(MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + + for (MachineMemOperand *MemOp : MI.memoperands()) + if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + + return false; +} + +void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl<MachineInstr *> &Candidates) { + for (auto &MI : *BB) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + if (!TII->shouldSink(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + "target\n"); + continue; + } + if (!L->isLoopInvariant(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + continue; + } + bool DontMoveAcrossStore = true; + if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + continue; + } + if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + continue; + } + if (MI.isConvergent()) + continue; + + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + + LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + Candidates.push_back(&MI); + } +} + bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -389,6 +461,37 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } + if (SinkInstsIntoLoop) { + SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end()); + for (auto *L : Loops) { + MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + continue; + } + SmallVector<MachineInstr *, 8> Candidates; + FindLoopSinkCandidates(L, Preheader, Candidates); + + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + // TODO: Sort the candidates using a cost-model. + unsigned i = 0; + for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + if (i++ == SinkIntoLoopLimit) { + LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + "be analysed."); + break; + } + + MachineInstr *I = *It; + if (!SinkIntoLoop(L, *I)) + break; + EverMadeChange = true; + ++NumLoopSunk; + } + } + } + HasStoreCache.clear(); StoreInstrCache.clear(); @@ -427,7 +530,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (!ProcessedBegin) --I; - if (MI.isDebugInstr()) { + if (MI.isDebugOrPseudoInstr()) { if (MI.isDebugValue()) ProcessDbgInst(MI); continue; @@ -464,9 +567,10 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) { MI.getDebugLoc()->getInlinedAt()); bool SeenBefore = SeenDbgVars.contains(Var); - MachineOperand &MO = MI.getDebugOperand(0); - if (MO.isReg() && MO.getReg().isVirtual()) - SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); + for (MachineOperand &MO : MI.debug_operands()) { + if (MO.isReg() && MO.getReg().isVirtual()) + SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); + } // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. SeenDbgVars.insert(Var); @@ -614,7 +718,7 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { MIE = MBB.instr_begin(); MII != MIE; --MII) { MachineInstr &MI = *std::prev(MII); - if (MI.isDebugValue() || MI.isDebugLabel()) + if (MI.isDebugInstr() || MI.isPseudoProbe()) continue; RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, *MRI, false, false); @@ -926,14 +1030,14 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if /// there's any subregister weirdness involved. Returns true if copy /// propagation occurred. -static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) { +static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, + Register Reg) { const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); // Copy DBG_VALUE operand and set the original to undef. We then check to // see whether this is something that can be copy-forwarded. If it isn't, // continue around the loop. - MachineOperand &DbgMO = DbgMI.getDebugOperand(0); const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; auto CopyOperands = TII.isCopyInstr(SinkInst); @@ -946,36 +1050,41 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) { bool PostRA = MRI.getNumVirtRegs() == 0; // Trying to forward between physical and virtual registers is too hard. - if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual()) + if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) return false; // Only try virtual register copy-forwarding before regalloc, and physical // register copy-forwarding after regalloc. - bool arePhysRegs = !DbgMO.getReg().isVirtual(); + bool arePhysRegs = !Reg.isVirtual(); if (arePhysRegs != PostRA) return false; // Pre-regalloc, only forward if all subregisters agree (or there are no // subregs at all). More analysis might recover some forwardable copies. - if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() || - DbgMO.getSubReg() != DstMO->getSubReg())) - return false; + if (!PostRA) + for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) + if (DbgMO.getSubReg() != SrcMO->getSubReg() || + DbgMO.getSubReg() != DstMO->getSubReg()) + return false; // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register // of this copy. Only forward the copy if the DBG_VALUE operand exactly // matches the copy destination. - if (PostRA && DbgMO.getReg() != DstMO->getReg()) + if (PostRA && Reg != DstMO->getReg()) return false; - DbgMO.setReg(SrcMO->getReg()); - DbgMO.setSubReg(SrcMO->getSubReg()); + for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { + DbgMO.setReg(SrcMO->getReg()); + DbgMO.setSubReg(SrcMO->getSubReg()); + } return true; } +using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, - SmallVectorImpl<MachineInstr *> &DbgValuesToSink) { + SmallVectorImpl<MIRegs> &DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting @@ -995,14 +1104,21 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); - DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + for (auto DbgValueToSink : DbgValuesToSink) { + MachineInstr *DbgMI = DbgValueToSink.first; + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); - if (!attemptDebugCopyProp(MI, *DbgMI)) + bool PropagatedAllSunkOps = true; + for (unsigned Reg : DbgValueToSink.second) { + if (DbgMI->hasDebugOperandForReg(Reg)) { + if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) { + PropagatedAllSunkOps = false; + break; + } + } + } + if (!PropagatedAllSunkOps) DbgMI->setDebugValueUndef(); } } @@ -1098,6 +1214,77 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. +bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + assert(Preheader && "Loop sink needs a preheader block"); + MachineBasicBlock *SinkBlock = nullptr; + bool CanSink = true; + const MachineOperand &MO = I.getOperand(0); + + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); + if (!L->contains(&MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + CanSink = false; + break; + } + + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + CanSink = false; + break; + } + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); + continue; + } + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + CanSink = false; + break; + } + LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + printMBBReference(*SinkBlock) << "\n"); + } + + if (!CanSink) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + return false; + } + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + return false; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + return false; + } + if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { + LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!I.isDebugInstr() && "Should not sink debug inst"); + I.setDebugLoc(DebugLoc()); + return true; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -1214,7 +1401,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, ++InsertPos; // Collect debug users of any vreg that this inst defines. - SmallVector<MachineInstr *, 4> DbgUsersToSink; + SmallVector<MIRegs, 4> DbgUsersToSink; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; @@ -1228,10 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, if (User.getInt()) { // This DBG_VALUE would re-order assignments. If we can't copy-propagate // it, it can't be recovered. Set it undef. - if (!attemptDebugCopyProp(MI, *DbgMI)) + if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg())) DbgMI->setDebugValueUndef(); } else { - DbgUsersToSink.push_back(DbgMI); + DbgUsersToSink.push_back( + {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())}); } } } @@ -1266,10 +1454,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( // be sunk. For the rest, if they are not dominated by the block we will sink // MI into, propagate the copy source to them. SmallVector<MachineInstr *, 4> DbgDefUsers; + SmallVector<Register, 4> DbgUseRegs; const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; + DbgUseRegs.push_back(MO.getReg()); for (auto &User : MRI.use_instructions(MO.getReg())) { if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent())) continue; @@ -1278,8 +1468,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( if (User.getParent() == MI.getParent()) continue; - assert(User.getDebugOperand(0).isReg() && - "DBG_VALUE user of vreg, but non reg operand?"); + assert(User.hasDebugOperandForReg(MO.getReg()) && + "DBG_VALUE user of vreg, but has no operand for it?"); DbgDefUsers.push_back(&User); } } @@ -1287,8 +1477,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( // Point the users of this copy that are no longer dominated, at the source // of the copy. for (auto *User : DbgDefUsers) { - User->getDebugOperand(0).setReg(MI.getOperand(1).getReg()); - User->getDebugOperand(0).setSubReg(MI.getOperand(1).getSubReg()); + for (auto &Reg : DbgUseRegs) { + for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { + DbgOp.setReg(MI.getOperand(1).getReg()); + DbgOp.setSubReg(MI.getOperand(1).getSubReg()); + } + } } } @@ -1351,8 +1545,10 @@ private: LiveRegUnits ModifiedRegUnits, UsedRegUnits; /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an - /// entry in this map for each unit it touches. - DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs; + /// entry in this map for each unit it touches. The DBG_VALUE's entry + /// consists of a pointer to the instruction itself, and a vector of registers + /// referred to by the instruction that overlap the key register unit. + DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. @@ -1534,23 +1730,32 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. if (MI->isDebugValue()) { - auto &MO = MI->getDebugOperand(0); - if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { - // Bail if we can already tell the sink would be rejected, rather - // than needlessly accumulating lots of DBG_VALUEs. - if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, - ModifiedRegUnits, UsedRegUnits)) - continue; - - // Record debug use of each reg unit. - SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : Units) - SeenDbgInstrs[Reg].push_back(MI); + SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits; + bool IsValid = true; + for (MachineOperand &MO : MI->debug_operands()) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { + // Bail if we can already tell the sink would be rejected, rather + // than needlessly accumulating lots of DBG_VALUEs. + if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, + ModifiedRegUnits, UsedRegUnits)) { + IsValid = false; + break; + } + + // Record debug use of each reg unit. + SmallSet<MCRegister, 4> RegUnits = getRegUnits(MO.getReg(), TRI); + for (MCRegister Reg : RegUnits) + MIUnits[Reg].push_back(MO.getReg()); + } + } + if (IsValid) { + for (auto RegOps : MIUnits) + SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second}); } continue; } - if (MI->isDebugInstr()) + if (MI->isDebugOrPseudoInstr()) continue; // Do not move any instruction across function call. @@ -1587,18 +1792,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // Collect DBG_VALUEs that must sink with this copy. We've previously // recorded which reg units that DBG_VALUEs read, if this instruction // writes any of those units then the corresponding DBG_VALUEs must sink. - SetVector<MachineInstr *> DbgValsToSinkSet; + MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : Units) - for (auto *MI : SeenDbgInstrs.lookup(Reg)) - DbgValsToSinkSet.insert(MI); + for (MCRegister Reg : Units) { + for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) { + auto &Regs = DbgValsToSinkMap[MIRegs.first]; + for (unsigned Reg : MIRegs.second) + Regs.push_back(Reg); + } + } } - SmallVector<MachineInstr *, 4> DbgValsToSink(DbgValsToSinkSet.begin(), - DbgValsToSinkSet.end()); + SmallVector<MIRegs, 4> DbgValsToSink(DbgValsToSinkMap.begin(), + DbgValsToSinkMap.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index fb14f0a33209..0803c2b8b85a 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -182,7 +182,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, HashComponents.push_back(static_cast<unsigned>(Op->getSize())); HashComponents.push_back(static_cast<unsigned>(Op->getFlags())); HashComponents.push_back(static_cast<unsigned>(Op->getOffset())); - HashComponents.push_back(static_cast<unsigned>(Op->getOrdering())); + HashComponents.push_back(static_cast<unsigned>(Op->getSuccessOrdering())); HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace())); HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID())); HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value())); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 0f6d9b888f47..7e3198af02cd 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -223,6 +223,7 @@ namespace { void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum, LLT MOVRegType = LLT{}); + void report(const Twine &Msg, const MachineInstr *MI); void report_context(const LiveInterval &LI) const; void report_context(const LiveRange &LR, Register VRegUnit, @@ -500,6 +501,10 @@ void MachineVerifier::report(const char *msg, const MachineOperand *MO, errs() << "\n"; } +void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) { + report(Msg.str().c_str(), MI); +} + void MachineVerifier::report_context(SlotIndex Pos) const { errs() << "- at: " << Pos << '\n'; } @@ -940,7 +945,46 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report(ErrorInfo.data(), MI); // Verify properties of various specific instruction types - switch (MI->getOpcode()) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::G_ASSERT_SEXT: + case TargetOpcode::G_ASSERT_ZEXT: { + std::string OpcName = + Opc == TargetOpcode::G_ASSERT_ZEXT ? "G_ASSERT_ZEXT" : "G_ASSERT_SEXT"; + if (!MI->getOperand(2).isImm()) { + report(Twine(OpcName, " expects an immediate operand #2"), MI); + break; + } + + Register Dst = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + LLT SrcTy = MRI->getType(Src); + int64_t Imm = MI->getOperand(2).getImm(); + if (Imm <= 0) { + report(Twine(OpcName, " size must be >= 1"), MI); + break; + } + + if (Imm >= SrcTy.getScalarSizeInBits()) { + report(Twine(OpcName, " size must be less than source bit width"), MI); + break; + } + + if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) { + report( + Twine(OpcName, " source and destination register banks must match"), + MI); + break; + } + + if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst)) + report( + Twine(OpcName, " source and destination register classes must match"), + MI); + + break; + } + case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); @@ -1241,6 +1285,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isVector() || !SrcTy.isVector()) report("G_CONCAT_VECTOR requires vector source and destination operands", MI); + + if (MI->getNumOperands() < 3) + report("G_CONCAT_VECTOR requires at least 2 source operands", MI); + for (unsigned i = 2; i < MI->getNumOperands(); ++i) { if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MI->getOperand(i).getReg())) @@ -1363,10 +1411,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); - verifyVectorElementMatch(DstTy, SrcTy, MI); - int64_t Imm = MI->getOperand(2).getImm(); if (Imm <= 0) report("G_SEXT_INREG size must be >= 1", MI); @@ -1432,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } break; } + case TargetOpcode::G_MEMCPY_INLINE: case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: { ArrayRef<MachineMemOperand *> MMOs = MI->memoperands(); @@ -1462,28 +1508,38 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace()) report("inconsistent load address space", MI); + if (Opc != TargetOpcode::G_MEMCPY_INLINE) + if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL)) + report("'tail' flag (operand 3) must be an immediate 0 or 1", MI); + break; } + case TargetOpcode::G_BZERO: case TargetOpcode::G_MEMSET: { ArrayRef<MachineMemOperand *> MMOs = MI->memoperands(); + std::string Name = Opc == TargetOpcode::G_MEMSET ? "memset" : "bzero"; if (MMOs.size() != 1) { - report("memset must have 1 memory operand", MI); + report(Twine(Name, " must have 1 memory operand"), MI); break; } if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) { - report("memset memory operand must be a store", MI); + report(Twine(Name, " memory operand must be a store"), MI); break; } LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg()); if (!DstPtrTy.isPointer()) { - report("memset operand must be a pointer", MI); + report(Twine(Name, " operand must be a pointer"), MI); break; } if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) - report("inconsistent memset address space", MI); + report("inconsistent " + Twine(Name, " address space"), MI); + + if (!MI->getOperand(MI->getNumOperands() - 1).isImm() || + (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL)) + report("'tail' flag (last operand) must be an immediate 0 or 1", MI); break; } @@ -1521,6 +1577,28 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("Vector reduction requires vector source=", MI); break; } + + case TargetOpcode::G_SBFX: + case TargetOpcode::G_UBFX: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + if (DstTy.isVector()) { + report("Bitfield extraction is not supported on vectors", MI); + break; + } + break; + } + case TargetOpcode::G_ROTR: + case TargetOpcode::G_ROTL: { + LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg()); + if (Src1Ty.isVector() != Src2Ty.isVector()) { + report("Rotate requires operands to be either all scalars or all vectors", + MI); + break; + } + break; + } + default: break; } @@ -1582,7 +1660,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Other instructions must have one, unless they are inside a bundle. if (LiveInts) { bool mapped = !LiveInts->isNotInMIMap(*MI); - if (MI->isDebugInstr()) { + if (MI->isDebugOrPseudoInstr()) { if (mapped) report("Debug instruction has a slot index", MI); } else if (MI->isInsideBundle()) { @@ -1594,7 +1672,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - if (isPreISelGenericOpcode(MCID.getOpcode())) { + unsigned Opc = MCID.getOpcode(); + if (isPreISelGenericOpcode(Opc) || isPreISelGenericOptimizationHint(Opc)) { verifyPreISelGenericInstruction(MI); return; } @@ -1606,32 +1685,56 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Verify properties of various specific instruction types switch (MI->getOpcode()) { case TargetOpcode::COPY: { - if (foundErrors) - break; const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &SrcOp = MI->getOperand(1); - LLT DstTy = MRI->getType(DstOp.getReg()); - LLT SrcTy = MRI->getType(SrcOp.getReg()); + const Register SrcReg = SrcOp.getReg(); + const Register DstReg = DstOp.getReg(); + + LLT DstTy = MRI->getType(DstReg); + LLT SrcTy = MRI->getType(SrcReg); if (SrcTy.isValid() && DstTy.isValid()) { // If both types are valid, check that the types are the same. if (SrcTy != DstTy) { report("Copy Instruction is illegal with mismatching types", MI); errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n"; } + + break; } - if (SrcTy.isValid() || DstTy.isValid()) { - // If one of them have valid types, let's just check they have the same - // size. - unsigned SrcSize = TRI->getRegSizeInBits(SrcOp.getReg(), *MRI); - unsigned DstSize = TRI->getRegSizeInBits(DstOp.getReg(), *MRI); - assert(SrcSize && "Expecting size here"); - assert(DstSize && "Expecting size here"); - if (SrcSize != DstSize) - if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { - report("Copy Instruction is illegal with mismatching sizes", MI); - errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize - << "\n"; - } + + if (!SrcTy.isValid() && !DstTy.isValid()) + break; + + // If we have only one valid type, this is likely a copy between a virtual + // and physical register. + unsigned SrcSize = 0; + unsigned DstSize = 0; + if (SrcReg.isPhysical() && DstTy.isValid()) { + const TargetRegisterClass *SrcRC = + TRI->getMinimalPhysRegClassLLT(SrcReg, DstTy); + if (SrcRC) + SrcSize = TRI->getRegSizeInBits(*SrcRC); + } + + if (SrcSize == 0) + SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI); + + if (DstReg.isPhysical() && SrcTy.isValid()) { + const TargetRegisterClass *DstRC = + TRI->getMinimalPhysRegClassLLT(DstReg, SrcTy); + if (DstRC) + DstSize = TRI->getRegSizeInBits(*DstRC); + } + + if (DstSize == 0) + DstSize = TRI->getRegSizeInBits(DstReg, *MRI); + + if (SrcSize != 0 && DstSize != 0 && SrcSize != DstSize) { + if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { + report("Copy Instruction is illegal with mismatching sizes", MI); + errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize + << "\n"; + } } break; } @@ -1679,6 +1782,19 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // TODO: verify we have properly encoded deopt arguments } break; + case TargetOpcode::INSERT_SUBREG: { + unsigned InsertedSize; + if (unsigned SubIdx = MI->getOperand(2).getSubReg()) + InsertedSize = TRI->getSubRegIdxSize(SubIdx); + else + InsertedSize = TRI->getRegSizeInBits(MI->getOperand(2).getReg(), *MRI); + unsigned SubRegSize = TRI->getSubRegIdxSize(MI->getOperand(3).getImm()); + if (SubRegSize < InsertedSize) { + report("INSERT_SUBREG expected inserted value to have equal or lesser " + "size than the subreg it was inserted into", MI); + break; + } + } break; } } @@ -1716,9 +1832,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MCOI.OperandType == MCOI::OPERAND_REGISTER && !MO->isReg() && !MO->isFI()) report("Expected a register operand.", MO, MONum); - if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE || - MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg()) - report("Expected a non-register operand.", MO, MONum); + if (MO->isReg()) { + if (MCOI.OperandType == MCOI::OPERAND_IMMEDIATE || + (MCOI.OperandType == MCOI::OPERAND_PCREL && + !TII->isPCRelRegisterOperandLegal(*MO))) + report("Expected a non-register operand.", MO, MONum); + } } int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); @@ -2150,12 +2269,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!Register::isPhysicalRegister(MOP.getReg())) continue; - for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) { - if (SubReg == Reg) { - Bad = false; - break; - } - } + if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg)) + Bad = false; } } if (Bad) @@ -2903,6 +3018,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Check that VNI is live-out of all predecessors. for (const MachineBasicBlock *Pred : MFI->predecessors()) { SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred); + // Predecessor of landing pad live-out on last call. + if (MFI->isEHPad()) { + for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) { + if (I->isCall()) { + PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex(); + break; + } + } + } const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. However for a phi diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 095da09ea82b..b5517c40a28a 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1275,15 +1275,15 @@ class KernelRewriter { Register undef(const TargetRegisterClass *RC); public: - KernelRewriter(MachineLoop &L, ModuloSchedule &S, + KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB, LiveIntervals *LIS = nullptr); void rewrite(); }; } // namespace KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S, - LiveIntervals *LIS) - : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()), + MachineBasicBlock *LoopBB, LiveIntervals *LIS) + : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()), ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()), TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) { PreheaderBB = *BB->pred_begin(); @@ -1981,7 +1981,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { } void PeelingModuloScheduleExpander::rewriteKernel() { - KernelRewriter KR(*Schedule.getLoop(), Schedule); + KernelRewriter KR(*Schedule.getLoop(), Schedule, BB); KR.rewrite(); } @@ -2024,7 +2024,7 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { Preheader->addSuccessor(BB); // Now run the new expansion algorithm. - KernelRewriter KR(*Schedule.getLoop(), Schedule); + KernelRewriter KR(*Schedule.getLoop(), Schedule, BB); KR.rewrite(); peelPrologAndEpilogs(); diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp index 02a70ab801e9..8a6cf47c0d89 100644 --- a/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -83,8 +83,8 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { // introduce new opportunities, e.g., when i64 values are split up for // 32-bit targets. bool Changed = false; - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - Changed |= OptimizeBB(*I); + for (MachineBasicBlock &MBB : Fn) + Changed |= OptimizeBB(MBB); return Changed; } @@ -195,9 +195,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { // Check for dead PHI cycles. PHIsInCycle.clear(); if (IsDeadPHICycle(MI, PHIsInCycle)) { - for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); - PI != PE; ++PI) { - MachineInstr *PhiMI = *PI; + for (MachineInstr *PhiMI : PHIsInCycle) { if (MII == PhiMI) ++MII; PhiMI->eraseFromParent(); diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 8148b64d8443..54805584dbc1 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -316,6 +316,16 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, IncomingReg, DestReg); } + if (MPhi->peekDebugInstrNum()) { + // If referred to by debug-info, store where this PHI was. + MachineFunction *MF = MBB.getParent(); + unsigned ID = MPhi->peekDebugInstrNum(); + auto P = MachineFunction::DebugPHIRegallocPos(&MBB, IncomingReg, 0); + auto Res = MF->DebugPHIPositions.insert({ID, P}); + assert(Res.second); + (void)Res; + } + // Update live variable information if there is any. if (LV) { if (IncomingReg) { @@ -475,9 +485,10 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - NewSrcInstr = - TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), - SrcReg, SrcSubReg, IncomingReg); + // Delete the debug location, since the copy is inserted into a + // different basic block. + NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr, + SrcReg, SrcSubReg, IncomingReg); } } @@ -550,9 +561,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, LiveInterval &SrcLI = LIS->getInterval(SrcReg); bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), - SE = opBlock.succ_end(); SI != SE; ++SI) { - SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + for (MachineBasicBlock *Succ : opBlock.successors()) { + SlotIndex startIdx = LIS->getMBBStartIdx(Succ); VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); // Definitions by other PHIs are not truly live-in for our purposes. diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp index 849b667254bd..3e32afaafa6e 100644 --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -36,8 +36,8 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS, CodeGenPasses.run(*M); } -std::unique_ptr<Module> llvm::splitCodeGen( - std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs, +void llvm::splitCodeGen( + Module &M, ArrayRef<llvm::raw_pwrite_stream *> OSs, ArrayRef<llvm::raw_pwrite_stream *> BCOSs, const std::function<std::unique_ptr<TargetMachine>()> &TMFactory, CodeGenFileType FileType, bool PreserveLocals) { @@ -45,9 +45,9 @@ std::unique_ptr<Module> llvm::splitCodeGen( if (OSs.size() == 1) { if (!BCOSs.empty()) - WriteBitcodeToFile(*M, *BCOSs[0]); - codegen(M.get(), *OSs[0], TMFactory, FileType); - return M; + WriteBitcodeToFile(M, *BCOSs[0]); + codegen(&M, *OSs[0], TMFactory, FileType); + return; } // Create ThreadPool in nested scope so that threads will be joined @@ -57,7 +57,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( int ThreadCount = 0; SplitModule( - std::move(M), OSs.size(), + M, OSs.size(), [&](std::unique_ptr<Module> MPart) { // We want to clone the module in a new context to multi-thread the // codegen. We do it by serializing partition modules to bitcode @@ -95,6 +95,4 @@ std::unique_ptr<Module> llvm::splitCodeGen( }, PreserveLocals); } - - return {}; } diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 34ac396c0471..49bdba518322 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -585,15 +585,30 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, MRI->constrainRegClass(DstReg, DstRC); } + // SubReg defs are illegal in machine SSA phase, + // we should not generate SubReg defs. + // + // For example, for the instructions: + // + // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc + // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc + // + // We should generate: + // + // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc + // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0 + // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0 + // + if (UseSrcSubIdx) + RC = MRI->getRegClass(UseMI->getOperand(0).getReg()); + Register NewVR = MRI->createVirtualRegister(RC); - MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. - if (UseSrcSubIdx) { - Copy->getOperand(0).setSubReg(SubIdx); - Copy->getOperand(0).setIsUndef(); - } + if (UseSrcSubIdx) + UseMO->setSubReg(0); + UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index ed19f7448151..d232ca3a69c3 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -143,18 +143,16 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); assert(WorkList.empty() && "Inconsistent worklist state"); - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); - MFI != MFE; ++MFI) { + for (MachineBasicBlock &MBB : MF) { // Scan the basic block for implicit defs. - for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), - MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) - if (MBBI->isImplicitDef()) - WorkList.insert(&*MBBI); + for (MachineInstr &MI : MBB) + if (MI.isImplicitDef()) + WorkList.insert(&MI); if (WorkList.empty()) continue; - LLVM_DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() + LLVM_DEBUG(dbgs() << printMBBReference(MBB) << " has " << WorkList.size() << " implicit defs.\n"); Changed = true; diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 378aaba2a65f..2f65a450fb02 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -138,11 +138,6 @@ char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; -static cl::opt<unsigned> -WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), - cl::desc("Warn for stack size bigger than the given" - " number")); - INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -185,7 +180,8 @@ static void stashEntryDbgValues(MachineBasicBlock &MBB, break; if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) continue; - if (MI.getDebugOperand(0).isFI()) { + if (any_of(MI.debug_operands(), + [](const MachineOperand &MO) { return MO.isFI(); })) { // We can only emit valid locations for frame indices after the frame // setup, so do not stash away them. FrameIndexValues.push_back(&MI); @@ -277,8 +273,19 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); - if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { - DiagnosticInfoStackSize DiagStackSize(F, StackSize); + + unsigned Threshold = UINT_MAX; + if (MF.getFunction().hasFnAttribute("warn-stack-size")) { + bool Failed = MF.getFunction() + .getFnAttribute("warn-stack-size") + .getValueAsString() + .getAsInteger(10, Threshold); + // Verifier should have caught this. + assert(!Failed && "Invalid warn-stack-size fn attr value"); + (void)Failed; + } + if (StackSize > Threshold) { + DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold); F.getContext().diagnose(DiagStackSize); } ORE->emit([&]() { @@ -317,8 +324,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) { return; std::vector<MachineBasicBlock::iterator> FrameSDOps; - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + for (MachineBasicBlock &BB : MF) + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) if (TII.isFrameInstr(*I)) { unsigned Size = TII.getFrameSize(*I); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; @@ -337,10 +344,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) { MFI.setAdjustsStack(AdjustsStack); MFI.setMaxCallFrameSize(MaxCallFrameSize); - for (std::vector<MachineBasicBlock::iterator>::iterator - i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { - MachineBasicBlock::iterator I = *i; - + for (MachineBasicBlock::iterator I : FrameSDOps) { // If call frames are not being included as part of the stack frame, and // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't @@ -401,7 +405,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = F.getFrameInfo(); - if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI, MinCSFrameIndex, + MaxCSFrameIndex)) { // If target doesn't implement this, use generic code. if (CSI.empty()) @@ -438,7 +443,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. - Align Alignment(RegInfo->getSpillAlignment(*RC)); + Align Alignment = RegInfo->getSpillAlign(*RC); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. @@ -679,10 +684,12 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, // StackSlot scavenging is only implemented for the default stack. if (MFI.getStackID(i) == TargetStackID::Default) AllocatedFrameSlots.push_back(i); - // Add callee-save objects. - for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) - if (MFI.getStackID(i) == TargetStackID::Default) - AllocatedFrameSlots.push_back(i); + // Add callee-save objects if there are any. + if (MinCSFrameIndex <= MaxCSFrameIndex) { + for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) + if (MFI.getStackID(i) == TargetStackID::Default) + AllocatedFrameSlots.push_back(i); + } for (int i : AllocatedFrameSlots) { // These are converted from int64_t, but they should always fit in int @@ -772,9 +779,7 @@ static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, int64_t &Offset, Align &MaxAlign, unsigned Skew) { - for (StackObjSet::const_iterator I = UnassignedObjs.begin(), - E = UnassignedObjs.end(); I != E; ++I) { - int i = *I; + for (int i : UnassignedObjs) { AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); ProtectedObjs.insert(i); } @@ -837,7 +842,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // First assign frame offsets to stack objects that are used to spill // callee saved registers. - if (StackGrowsDown) { + if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { if (MFI.getStackID(i) != TargetStackID::Default) // Only allocate objects on the default stack. @@ -881,16 +886,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - bool EarlyScavengingSlots = (TFI.hasFP(MF) && - TFI.isFPCloseToIncomingSP() && + bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(MF) && - !RegInfo->needsStackRealignment(MF)); + !RegInfo->hasStackRealignment(MF)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVectorImpl<int>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + for (int SFI : SFIs) + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target @@ -1050,9 +1053,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVectorImpl<int>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + for (int SFI : SFIs) + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -1069,7 +1071,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // value. Align StackAlign; if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) + (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlign(); else StackAlign = TFI.getTransientStackAlign(); @@ -1083,18 +1085,19 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // If we have increased the offset to fulfill the alignment constrants, // then the scavenging spill slots may become harder to reach from the // stack pointer, float them so they stay close. - if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) { + if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS && + !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() << "Adjusting emergency spill slots!\n";); int64_t Delta = Offset - OffsetBeforeAlignment; - for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); - I != IE; ++I) { - LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #" - << *I << " from " << MFI.getObjectOffset(*I);); - MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta); - LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";); + for (int SFI : SFIs) { + LLVM_DEBUG(llvm::dbgs() + << "Adjusting offset of emergency spill slot #" << SFI + << " from " << MFI.getObjectOffset(SFI);); + MFI.setObjectOffset(SFI, MFI.getObjectOffset(SFI) - Delta); + LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n";); } } } @@ -1222,16 +1225,19 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI.isDebugValue()) { - assert(i == 0 && "Frame indices can only appear as the first " - "operand of a DBG_VALUE machine instruction"); + MachineOperand &Op = MI.getOperand(i); + assert( + MI.isDebugOperand(&Op) && + "Frame indices can only appear as a debug operand in a DBG_VALUE*" + " machine instruction"); Register Reg; - unsigned FrameIdx = MI.getOperand(0).getIndex(); + unsigned FrameIdx = Op.getIndex(); unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); - MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); - MI.getOperand(0).setIsDebug(); + Op.ChangeToRegister(Reg, false /*isDef*/); + Op.setIsDebug(); const DIExpression *DIExpr = MI.getDebugExpression(); @@ -1240,25 +1246,38 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // complex location that is interpreted as being a memory address. // This changes a pointer-valued variable to dereference that pointer, // which is incorrect. Fix by adding DW_OP_stack_value. - unsigned PrependFlags = DIExpression::ApplyOffset; - if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) - PrependFlags |= DIExpression::StackValue; - - // If we have DBG_VALUE that is indirect and has a Implicit location - // expression need to insert a deref before prepending a Memory - // location expression. Also after doing this we change the DBG_VALUE - // to be direct. - if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { - SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; - bool WithStackValue = true; - DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); - // Make the DBG_VALUE direct. - MI.getDebugOffset().ChangeToRegister(0, false); - } - DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); + if (MI.isNonListDebugValue()) { + unsigned PrependFlags = DIExpression::ApplyOffset; + if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) + PrependFlags |= DIExpression::StackValue; + + // If we have DBG_VALUE that is indirect and has a Implicit location + // expression need to insert a deref before prepending a Memory + // location expression. Also after doing this we change the DBG_VALUE + // to be direct. + if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; + bool WithStackValue = true; + DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + // Make the DBG_VALUE direct. + MI.getDebugOffset().ChangeToRegister(0, false); + } + DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); + } else { + // The debug operand at DebugOpIndex was a frame index at offset + // `Offset`; now the operand has been replaced with the frame + // register, we must add Offset with `register x, plus Offset`. + unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op); + SmallVector<uint64_t, 3> Ops; + TRI.getOffsetOpcodes(Offset, Ops); + DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex); + } MI.getDebugExpressionOp().setMetadata(DIExpr); continue; + } else if (MI.isDebugPHI()) { + // Allow stack ref to continue onwards. + continue; } // TODO: This code should be commoned with the code for diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp index 9c716a5a37ea..a9fb577d5735 100644 --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -20,8 +20,9 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Target/TargetMachine.h" -#include <unordered_map> +#include <unordered_set> #define DEBUG_TYPE "pseudo-probe-inserter" @@ -47,7 +48,10 @@ public: const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { + MachineInstr *FirstInstr = nullptr; for (MachineInstr &MI : MBB) { + if (!MI.isPseudo()) + FirstInstr = &MI; if (MI.isCall()) { if (DILocation *DL = MI.getDebugLoc()) { auto Value = DL->getDiscriminator(); @@ -65,6 +69,53 @@ public: } } } + + // Walk the block backwards, move PSEUDO_PROBE before the first real + // instruction to fix out-of-order probes. There is a problem with probes + // as the terminator of the block. During the offline counts processing, + // the samples collected on the first physical instruction following a + // probe will be counted towards the probe. This logically equals to + // treating the instruction next to a probe as if it is from the same + // block of the probe. This is accurate most of the time unless the + // instruction can be reached from multiple flows, which means it actually + // starts a new block. Samples collected on such probes may cause + // imprecision with the counts inference algorithm. Fortunately, if + // there are still other native instructions preceding the probe we can + // use them as a place holder to collect samples for the probe. + if (FirstInstr) { + auto MII = MBB.rbegin(); + while (MII != MBB.rend()) { + // Skip all pseudo probes followed by a real instruction since they + // are not dangling. + if (!MII->isPseudo()) + break; + auto Cur = MII++; + if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE) + continue; + // Move the dangling probe before FirstInstr. + auto *ProbeInstr = &*Cur; + MBB.remove(ProbeInstr); + MBB.insert(FirstInstr, ProbeInstr); + Changed = true; + } + } else { + // Probes not surrounded by any real instructions in the same block are + // called dangling probes. Since there's no good way to pick up a sample + // collection point for dangling probes at compile time, they are being + // removed so that the profile correlation tool will not report any + // samples collected for them and it's up to the counts inference tool + // to get them a reasonable count. + SmallVector<MachineInstr *, 4> ToBeRemoved; + for (MachineInstr &MI : MBB) { + if (MI.isPseudoProbe()) + ToBeRemoved.push_back(&MI); + } + + for (auto *MI : ToBeRemoved) + MI->eraseFromParent(); + + Changed |= !ToBeRemoved.empty(); + } } return Changed; diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index cebb902f0a4a..f605068e076d 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -994,8 +994,8 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { // For each stack in the map DefM, push the delimiter for block B on it. void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { // Push block delimiters. - for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) - I->second.start_block(B); + for (auto &P : DefM) + P.second.start_block(B); } // Remove all definitions coming from block B from each stack in DefM. @@ -1003,8 +1003,8 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) { // Pop all defs from this block from the definition stack. Defs that were // added to the map during the traversal of instructions will not have a // delimiter, but for those, the whole stack will be emptied. - for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) - I->second.clear_block(B); + for (auto &P : DefM) + P.second.clear_block(B); // Finally, remove empty stacks from the map. for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) { diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 76bf0c280970..d92c6a997f31 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -238,8 +238,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, [this](auto A, auto B) { return MDT.properlyDominates(A, B); }); std::vector<NodeId> TmpInst; - for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) { - auto &Bucket = Blocks[*I]; + for (MachineBasicBlock *MBB : llvm::reverse(TmpBB)) { + auto &Bucket = Blocks[MBB]; TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend()); } @@ -866,8 +866,8 @@ void Liveness::computeLiveIns() { // Dump the liveness map for (MachineBasicBlock &B : MF) { std::vector<RegisterRef> LV; - for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) - LV.push_back(RegisterRef(I->PhysReg, I->LaneMask)); + for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins()) + LV.push_back(RegisterRef(LI.PhysReg, LI.LaneMask)); llvm::sort(LV); dbgs() << printMBBReference(B) << "\t rec = {"; for (auto I : LV) @@ -893,16 +893,14 @@ void Liveness::resetLiveIns() { for (auto &B : DFG.getMF()) { // Remove all live-ins. std::vector<unsigned> T; - for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) - T.push_back(I->PhysReg); + for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins()) + T.push_back(LI.PhysReg); for (auto I : T) B.removeLiveIn(I); // Add the newly computed live-ins. const RegisterAggr &LiveIns = LiveMap[&B]; - for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) { - RegisterRef R = *I; + for (const RegisterRef R : make_range(LiveIns.rr_begin(), LiveIns.rr_end())) B.addLiveIn({MCPhysReg(R.Reg), R.Mask}); - } } } @@ -933,13 +931,12 @@ void Liveness::resetKills(MachineBasicBlock *B) { for (auto SI : B->successors()) CopyLiveIns(SI, Live); - for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) { - MachineInstr *MI = &*I; - if (MI->isDebugInstr()) + for (MachineInstr &MI : llvm::reverse(*B)) { + if (MI.isDebugInstr()) continue; - MI->clearKillInfo(); - for (auto &Op : MI->operands()) { + MI.clearKillInfo(); + for (auto &Op : MI.operands()) { // An implicit def of a super-register may not necessarily start a // live range of it, since an implicit use could be used to keep parts // of it live. Instead of analyzing the implicit operands, ignore @@ -952,7 +949,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) Live.reset(*SR); } - for (auto &Op : MI->operands()) { + for (auto &Op : MI.operands()) { if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; Register R = Op.getReg(); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index d16e90a7e0b4..c850571da2ed 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -124,7 +125,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) { for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid(); ++Unit) { // This instruction explicitly defines the current reg unit. - LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr + LLVM_DEBUG(dbgs() << printRegUnit(*Unit, TRI) << ":\t" << CurInstr << '\t' << *MI); // How many instructions since this reg unit was last written? @@ -660,10 +661,7 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, SmallPtrSet<MachineInstr*, 4> Uses; getGlobalUses(Def, PhysReg, Uses); - for (auto *Use : Uses) - if (!Dead.count(Use)) - return false; - return true; + return llvm::set_is_subset(Uses, Dead); }; for (auto &MO : MI->operands()) { @@ -688,9 +686,8 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) { SmallPtrSet<MachineInstr*, 2> Uses; getGlobalUses(Def, PhysReg, Uses); - for (auto *Use : Uses) - if (!Ignore.count(Use)) - return false; + if (!llvm::set_is_subset(Uses, Ignore)) + return false; } else return false; } diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index aa749ca43e74..d891d4c2ffbb 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" -STATISTIC(NumNewQueued , "Number of new live ranges queued"); +STATISTIC(NumNewQueued, "Number of new live ranges queued"); // Temporary verification option until we can put verification inside // MachineVerifier. @@ -54,8 +54,7 @@ bool RegAllocBase::VerifyEnabled = false; // Pin the vtable to this file. void RegAllocBase::anchor() {} -void RegAllocBase::init(VirtRegMap &vrm, - LiveIntervals &lis, +void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat) { TRI = &vrm.getTargetRegInfo(); MRI = &vrm.getRegInfo(); @@ -124,7 +123,12 @@ void RegAllocBase::allocatePhysRegs() { if (MI->isInlineAsm()) break; } - if (MI && MI->isInlineAsm()) { + + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg->reg()); + ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC); + if (AllocOrder.empty()) + report_fatal_error("no registers from class available to allocate"); + else if (MI && MI->isInlineAsm()) { MI->emitError("inline assembly requires more registers than available"); } else if (MI) { LLVMContext &Context = @@ -133,10 +137,9 @@ void RegAllocBase::allocatePhysRegs() { } else { report_fatal_error("ran out of registers during register allocation"); } + // Keep going after reporting the error. - VRM->assignVirt2Phys( - VirtReg->reg(), - RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front()); + VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front()); continue; } @@ -172,3 +175,21 @@ void RegAllocBase::postOptimization() { } DeadRemats.clear(); } + +void RegAllocBase::enqueue(LiveInterval *LI) { + const Register Reg = LI->reg(); + + assert(Reg.isVirtual() && "Can only enqueue virtual registers"); + + if (VRM->hasPhys(Reg)) + return; + + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + if (ShouldAllocateClass(*TRI, RC)) { + LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); + enqueueImpl(LI); + } else { + LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI) + << " in skipped register class\n"); + } +} diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 3144605345e9..1fb56dbaebb7 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -37,6 +37,7 @@ #define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { @@ -67,6 +68,7 @@ protected: LiveIntervals *LIS = nullptr; LiveRegMatrix *Matrix = nullptr; RegisterClassInfo RegClassInfo; + const RegClassFilterFunc ShouldAllocateClass; /// Inst which is a def of an original reg and whose defs are already all /// dead after remat is saved in DeadRemats. The deletion of such inst is @@ -74,7 +76,9 @@ protected: /// always available for the remat of all the siblings of the original reg. SmallPtrSet<MachineInstr *, 32> DeadRemats; - RegAllocBase() = default; + RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) : + ShouldAllocateClass(F) {} + virtual ~RegAllocBase() = default; // A RegAlloc pass should call this before allocatePhysRegs. @@ -92,7 +96,10 @@ protected: virtual Spiller &spiller() = 0; /// enqueue - Add VirtReg to the priority queue of unassigned registers. - virtual void enqueue(LiveInterval *LI) = 0; + virtual void enqueueImpl(LiveInterval *LI) = 0; + + /// enqueue - Add VirtReg to the priority queue of unassigned registers. + void enqueue(LiveInterval *LI); /// dequeue - Return the next unassigned register, or NULL. virtual LiveInterval *dequeue() = 0; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 8f2cb48c5d69..b65d58077958 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -76,7 +76,7 @@ class RABasic : public MachineFunctionPass, void LRE_WillShrinkVirtReg(Register) override; public: - RABasic(); + RABasic(const RegClassFilterFunc F = allocateAllRegClasses); /// Return the pass name. StringRef getPassName() const override { return "Basic Register Allocator"; } @@ -88,7 +88,7 @@ public: Spiller &spiller() override { return *SpillerInstance; } - void enqueue(LiveInterval *LI) override { + void enqueueImpl(LiveInterval *LI) override { Queue.push(LI); } @@ -171,7 +171,9 @@ void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) { enqueue(&LI); } -RABasic::RABasic(): MachineFunctionPass(ID) { +RABasic::RABasic(RegClassFilterFunc F): + MachineFunctionPass(ID), + RegAllocBase(F) { } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -286,16 +288,14 @@ MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg, } // Try to spill another interfering reg with less spill weight. - for (auto PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); - PhysRegI != PhysRegE; ++PhysRegI) { - if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + for (MCRegister &PhysReg : PhysRegSpillCands) { + if (!spillInterferences(VirtReg, PhysReg, SplitVRegs)) continue; - assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && + assert(!Matrix->checkInterference(VirtReg, PhysReg) && "Interference after spill."); // Tell the caller to allocate to this newly freed physical register. - return *PhysRegI; + return PhysReg; } // No other spill candidates were found, so spill the current VirtReg. @@ -322,7 +322,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { getAnalysis<MachineBlockFrequencyInfo>()); VRAI.calculateSpillWeightsAndHints(); - SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI)); allocatePhysRegs(); postOptimization(); @@ -334,7 +334,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { return true; } -FunctionPass* llvm::createBasicRegisterAllocator() -{ +FunctionPass* llvm::createBasicRegisterAllocator() { return new RABasic(); } + +FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) { + return new RABasic(F); +} diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 6e548d4a93c8..707161d5a8b0 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -69,7 +70,13 @@ namespace { public: static char ID; - RegAllocFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {} + RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses, + bool ClearVirtRegs_ = true) : + MachineFunctionPass(ID), + ShouldAllocateClass(F), + StackSlotForVirtReg(-1), + ClearVirtRegs(ClearVirtRegs_) { + } private: MachineFrameInfo *MFI; @@ -77,6 +84,7 @@ namespace { const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; + const RegClassFilterFunc ShouldAllocateClass; /// Basic block currently being allocated. MachineBasicBlock *MBB; @@ -84,6 +92,8 @@ namespace { /// Maps virtual regs to the frame index where these values are spilled. IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + bool ClearVirtRegs; + /// Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse = nullptr; ///< Last instr to use reg. @@ -108,7 +118,7 @@ namespace { /// Stores assigned virtual registers present in the bundle MI. DenseMap<Register, MCPhysReg> BundleVirtRegsMap; - DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap; + DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap; /// List of DBG_VALUE that we encountered without the vreg being assigned /// because they were placed after the last use of the vreg. DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues; @@ -147,6 +157,8 @@ namespace { RegUnitSet UsedInInstr; RegUnitSet PhysRegUses; SmallVector<uint16_t, 8> DefOperandIndexes; + // Register masks attached to the current instruction. + SmallVector<const uint32_t *> RegMasks; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); bool isPhysRegFree(MCPhysReg PhysReg) const; @@ -157,8 +169,17 @@ namespace { UsedInInstr.insert(*Units); } + // Check if physreg is clobbered by instruction's regmask(s). + bool isClobberedByRegMasks(MCPhysReg PhysReg) const { + return llvm::any_of(RegMasks, [PhysReg](const uint32_t *Mask) { + return MachineOperand::clobbersPhysReg(Mask, PhysReg); + }); + } + /// Check if a physreg or any of its aliases are used in this instruction. bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { + if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg)) + return true; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { if (UsedInInstr.count(*Units)) return true; @@ -202,8 +223,12 @@ namespace { } MachineFunctionProperties getSetProperties() const override { - return MachineFunctionProperties().set( + if (ClearVirtRegs) { + return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); + } + + return MachineFunctionProperties(); } MachineFunctionProperties getClearedProperties() const override { @@ -406,9 +431,15 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, // When we spill a virtual register, we will have spill instructions behind // every definition of it, meaning we can switch all the DBG_VALUEs over // to just reference the stack slot. - SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg]; - for (MachineInstr *DBG : LRIDbgValues) { - MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); + SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg]; + SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>> + SpilledOperandsMap; + for (MachineOperand *MO : LRIDbgOperands) + SpilledOperandsMap[MO->getParent()].push_back(MO); + for (auto MISpilledOperands : SpilledOperandsMap) { + MachineInstr &DBG = *MISpilledOperands.first; + MachineInstr *NewDV = buildDbgValueForSpill( + *MBB, Before, *MISpilledOperands.first, FI, MISpilledOperands.second); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); @@ -424,14 +455,19 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, } // Rewrite unassigned dbg_values to use the stack slot. - MachineOperand &MO = DBG->getOperand(0); - if (MO.isReg() && MO.getReg() == 0) - updateDbgValueForSpill(*DBG, FI); + // TODO We can potentially do this for list debug values as well if we know + // how the dbg_values are getting unassigned. + if (DBG.isNonListDebugValue()) { + MachineOperand &MO = DBG.getDebugOperand(0); + if (MO.isReg() && MO.getReg() == 0) { + updateDbgValueForSpill(DBG, FI, 0); + } + } } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value // now. - LRIDbgValues.clear(); + LRIDbgOperands.clear(); } /// Insert reload instruction for \p PhysReg before \p Before. @@ -623,8 +659,7 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second; for (MachineInstr *DbgValue : Dangling) { assert(DbgValue->isDebugValue()); - MachineOperand &MO = DbgValue->getOperand(0); - if (!MO.isReg()) + if (!DbgValue->hasDebugOperandForReg(VirtReg)) continue; // Test whether the physreg survives from the definition to the DBG_VALUE. @@ -639,9 +674,11 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, break; } } - MO.setReg(SetToReg); - if (SetToReg != 0) - MO.setIsRenamable(); + for (MachineOperand &MO : DbgValue->getDebugOperandsForReg(VirtReg)) { + MO.setReg(SetToReg); + if (SetToReg != 0) + MO.setIsRenamable(); + } } Dangling.clear(); } @@ -1076,6 +1113,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // operands and early-clobbers. UsedInInstr.clear(); + RegMasks.clear(); BundleVirtRegsMap.clear(); // Scan for special cases; Apply pre-assigned register defs to state. @@ -1115,6 +1153,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } } else if (MO.isRegMask()) { HasRegMask = true; + RegMasks.push_back(MO.getRegMask()); } } @@ -1230,6 +1269,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; } + assert((!MO.isTied() || !isClobberedByRegMasks(MO.getReg())) && + "tied def assigned to clobbered register"); + // Do not free tied operands and early clobbers. if (MO.isTied() || MO.isEarlyClobber()) continue; @@ -1246,20 +1288,16 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Displace clobbered registers. if (HasRegMask) { - for (const MachineOperand &MO : MI.operands()) { - if (MO.isRegMask()) { - // MRI bookkeeping. - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - - // Displace clobbered registers. - const uint32_t *Mask = MO.getRegMask(); - for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(), - LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) { - MCPhysReg PhysReg = LRI->PhysReg; - if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg)) - displacePhysReg(MI, PhysReg); - } - } + assert(!RegMasks.empty() && "expected RegMask"); + // MRI bookkeeping. + for (const auto *RM : RegMasks) + MRI->addPhysRegsUsedFromRegMask(RM); + + // Displace clobbered registers. + for (const LiveReg &LR : LiveVirtRegs) { + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg != 0 && isClobberedByRegMasks(PhysReg)) + displacePhysReg(MI, PhysReg); } } @@ -1361,37 +1399,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } void RegAllocFast::handleDebugValue(MachineInstr &MI) { - MachineOperand &MO = MI.getDebugOperand(0); - // Ignore DBG_VALUEs that aren't based on virtual registers. These are // mostly constants and frame indices. - if (!MO.isReg()) - return; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - return; + for (Register Reg : MI.getUsedDebugRegs()) { + if (!Register::isVirtualRegister(Reg)) + continue; - // Already spilled to a stackslot? - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(MI, SS); - LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); - return; - } + // Already spilled to a stackslot? + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS, Reg); + LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); + continue; + } - // See if this virtual register has already been allocated to a physical - // register or spilled to a stack slot. - LiveRegMap::iterator LRI = findLiveVirtReg(Reg); - if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { - setPhysReg(MI, MO, LRI->PhysReg); - } else { - DanglingDbgValues[Reg].push_back(&MI); - } + // See if this virtual register has already been allocated to a physical + // register or spilled to a stack slot. + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + SmallVector<MachineOperand *> DbgOps; + for (MachineOperand &Op : MI.getDebugOperandsForReg(Reg)) + DbgOps.push_back(&Op); + + if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { + // Update every use of Reg within MI. + for (auto &RegMO : DbgOps) + setPhysReg(MI, *RegMO, LRI->PhysReg); + } else { + DanglingDbgValues[Reg].push_back(&MI); + } - // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so - // that future spills of Reg will have DBG_VALUEs. - LiveDbgValueMap[Reg].push_back(&MI); + // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so + // that future spills of Reg will have DBG_VALUEs. + LiveDbgValueMap[Reg].append(DbgOps.begin(), DbgOps.end()); + } } void RegAllocFast::handleBundle(MachineInstr &MI) { @@ -1425,10 +1466,8 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - for (MachineBasicBlock *Succ : MBB.successors()) { - for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins()) - setPhysRegState(LI.PhysReg, regPreAssigned); - } + for (auto &LiveReg : MBB.liveouts()) + setPhysRegState(LiveReg.PhysReg, regPreAssigned); Coalesced.clear(); @@ -1473,13 +1512,12 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { for (auto &UDBGPair : DanglingDbgValues) { for (MachineInstr *DbgValue : UDBGPair.second) { assert(DbgValue->isDebugValue() && "expected DBG_VALUE"); - MachineOperand &MO = DbgValue->getOperand(0); // Nothing to do if the vreg was spilled in the meantime. - if (!MO.isReg()) + if (!DbgValue->hasDebugOperandForReg(UDBGPair.first)) continue; LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue << '\n'); - MO.setReg(0); + DbgValue->setDebugValueUndef(); } } DanglingDbgValues.clear(); @@ -1515,9 +1553,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) allocateBasicBlock(MBB); - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers. - MRI->clearVirtRegs(); + if (ClearVirtRegs) { + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + MRI->clearVirtRegs(); + } StackSlotForVirtReg.clear(); LiveDbgValueMap.clear(); @@ -1527,3 +1567,9 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); } + +FunctionPass *llvm::createFastRegisterAllocator( + std::function<bool(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC)> Ftor, bool ClearVirtRegs) { + return new RegAllocFast(Ftor, ClearVirtRegs); +} diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 166414e4ffa1..4eb12aa30ee9 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/IR/DebugInfoMetadata.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -406,8 +407,12 @@ class RAGreedy : public MachineFunctionPass, /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + /// The register cost values. This list will be recreated for each Machine + /// Function + ArrayRef<uint8_t> RegCosts; + public: - RAGreedy(); + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); /// Return the pass name. StringRef getPassName() const override { return "Greedy Register Allocator"; } @@ -416,7 +421,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; Spiller &spiller() override { return *SpillerInstance; } - void enqueue(LiveInterval *LI) override; + void enqueueImpl(LiveInterval *LI) override; LiveInterval *dequeue() override; MCRegister selectOrSplit(LiveInterval &, SmallVectorImpl<Register> &) override; @@ -463,28 +468,29 @@ private: bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(MCRegister, SmallVectorImpl<float> &); - Register canReassign(LiveInterval &VirtReg, Register PrevReg); - bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, - const SmallVirtRegSet &); - bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg, - SlotIndex Start, SlotIndex End, - EvictionCost &MaxCost); + const SmallVirtRegSet &) const; + bool canEvictInterferenceInRange(const LiveInterval &VirtReg, + MCRegister PhysReg, SlotIndex Start, + SlotIndex End, EvictionCost &MaxCost) const; MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, SlotIndex Start, - SlotIndex End, float *BestEvictWeight); + const LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictWeight) const; void evictInterference(LiveInterval &, MCRegister, SmallVectorImpl<Register> &); bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); - Register tryAssign(LiveInterval&, AllocationOrder&, + MCRegister tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<Register>&, const SmallVirtRegSet&); - unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<Register>&, unsigned, - const SmallVirtRegSet&); + MCRegister tryEvict(LiveInterval &, AllocationOrder &, + SmallVectorImpl<Register> &, uint8_t, + const SmallVirtRegSet &); MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); /// Calculate cost of region splitting. @@ -501,7 +507,7 @@ private: /// time. MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, - unsigned &CostPerUseLimit, + uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs); void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, @@ -541,19 +547,50 @@ private: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; - /// Compute and report the number of spills and reloads for a loop. - void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, - unsigned &FoldedReloads, unsigned &Spills, - unsigned &FoldedSpills); - - /// Report the number of spills and reloads for each loop. - void reportNumberOfSplillsReloads() { - for (MachineLoop *L : *Loops) { - unsigned Reloads, FoldedReloads, Spills, FoldedSpills; - reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills, - FoldedSpills); + /// Greedy RA statistic to remark. + struct RAGreedyStats { + unsigned Reloads = 0; + unsigned FoldedReloads = 0; + unsigned ZeroCostFoldedReloads = 0; + unsigned Spills = 0; + unsigned FoldedSpills = 0; + unsigned Copies = 0; + float ReloadsCost = 0.0f; + float FoldedReloadsCost = 0.0f; + float SpillsCost = 0.0f; + float FoldedSpillsCost = 0.0f; + float CopiesCost = 0.0f; + + bool isEmpty() { + return !(Reloads || FoldedReloads || Spills || FoldedSpills || + ZeroCostFoldedReloads || Copies); } - } + + void add(RAGreedyStats other) { + Reloads += other.Reloads; + FoldedReloads += other.FoldedReloads; + ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; + Spills += other.Spills; + FoldedSpills += other.FoldedSpills; + Copies += other.Copies; + ReloadsCost += other.ReloadsCost; + FoldedReloadsCost += other.FoldedReloadsCost; + SpillsCost += other.SpillsCost; + FoldedSpillsCost += other.FoldedSpillsCost; + CopiesCost += other.CopiesCost; + } + + void report(MachineOptimizationRemarkMissed &R); + }; + + /// Compute statistic for a basic block. + RAGreedyStats computeStats(MachineBasicBlock &MBB); + + /// Compute and report statistic through a remark. + RAGreedyStats reportStats(MachineLoop *L); + + /// Report the statistic for each loop. + void reportStats(); }; } // end anonymous namespace @@ -599,7 +636,22 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID) { +namespace llvm { +FunctionPass* createGreedyRegisterAllocator( + std::function<bool(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC)> Ftor); + +} + +FunctionPass* llvm::createGreedyRegisterAllocator( + std::function<bool(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC)> Ftor) { + return new RAGreedy(Ftor); +} + +RAGreedy::RAGreedy(RegClassFilterFunc F): + MachineFunctionPass(ID), + RegAllocBase(F) { } void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { @@ -656,7 +708,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); Matrix->unassign(LI); - enqueue(&LI); + RegAllocBase::enqueue(&LI); } void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { @@ -679,7 +731,7 @@ void RAGreedy::releaseMemory() { GlobalCand.clear(); } -void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } +void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); } void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. @@ -708,6 +760,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); + bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); @@ -731,6 +784,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. Prio = (1u << 29) + Size; + + if (AddPriorityToGlobal) + Prio |= RC.AllocationPriority << 24; } // Mark a higher bit to prioritize global and local above RS_Split. Prio |= (1u << 31); @@ -759,11 +815,11 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { //===----------------------------------------------------------------------===// /// tryAssign - Try to assign VirtReg to an available register. -Register RAGreedy::tryAssign(LiveInterval &VirtReg, +MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { - Register PhysReg; + MCRegister PhysReg; for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { assert(*I); if (!Matrix->checkInterference(VirtReg, *I)) { @@ -797,7 +853,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, } // Try to evict interference from a cheaper alternative. - unsigned Cost = TRI->getCostPerUse(PhysReg); + uint8_t Cost = RegCosts[PhysReg]; // Most registers have 0 additional cost. if (!Cost) @@ -805,7 +861,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); - Register CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); + MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); return CheapReg ? CheapReg : PhysReg; } @@ -813,7 +869,7 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { +Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); MCRegister PhysReg; @@ -853,7 +909,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { /// @param B The live range to be evicted. /// @param BreaksHint True when B is already assigned to its preferred register. bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, bool BreaksHint) { + LiveInterval &B, bool BreaksHint) const { bool CanSplit = getStage(B) < RS_Spill; // Be fairly aggressive about following hints as long as the evictee can be @@ -877,9 +933,9 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, EvictionCost &MaxCost, - const SmallVirtRegSet &FixedRegisters) { +bool RAGreedy::canEvictInterference( + LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; @@ -975,14 +1031,15 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, /// \param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// \return True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, +bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, - EvictionCost &MaxCost) { + EvictionCost &MaxCost) const { EvictionCost Cost; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); // Check if any interfering live range is heavier than MaxWeight. for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { @@ -1027,9 +1084,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, /// \return The PhysReg which is the best candidate for eviction and the /// eviction cost in BestEvictweight MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, + const LiveInterval &VirtReg, SlotIndex Start, SlotIndex End, - float *BestEvictweight) { + float *BestEvictweight) const { EvictionCost BestEvictCost; BestEvictCost.setMax(); BestEvictCost.MaxWeight = VirtReg.weight(); @@ -1109,10 +1166,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const { /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, - AllocationOrder &Order, +MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, - unsigned CostPerUseLimit, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) { NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -1125,13 +1181,13 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. - if (CostPerUseLimit < ~0u) { + if (CostPerUseLimit < uint8_t(~0u)) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight(); // Check of any registers in RC are below CostPerUseLimit. const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); - unsigned MinCost = RegClassInfo.getMinCost(RC); + uint8_t MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); @@ -1140,7 +1196,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // It is normal for register classes to have a long tail of registers with // the same cost. We don't need to look at them if they're too expensive. - if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) { OrderLimit = RegClassInfo.getLastCostChange(RC); LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); @@ -1151,7 +1207,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, ++I) { MCRegister PhysReg = *I; assert(PhysReg); - if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + if (RegCosts[PhysReg] >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. @@ -1175,10 +1231,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, break; } - if (!BestPhys) - return 0; - - evictInterference(VirtReg, BestPhys, NewVRegs); + if (BestPhys.isValid()) + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -1289,8 +1343,9 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, // Abort if the spill cannot be inserted at the MBB' start MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (!MBB->empty() && - SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()), + auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr(); + if (FirstNonDebugInstr != MBB->end() && + SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr), SA->getFirstSplitPoint(Number))) return false; // Interference for the live-in value. @@ -1331,9 +1386,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { for (unsigned Bundle : NewBundles) { // Look at all blocks connected to Bundle in the full graph. ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); - for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) { - unsigned Block = *I; + for (unsigned Block : Blocks) { if (!Todo.test(Block)) continue; Todo.reset(Block); @@ -1557,25 +1610,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, return false; } - // Check if the local interval will evict a cheaper interval. - float CheapestEvictWeight = 0; - MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight( - Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), - Cand.Intf.last(), &CheapestEvictWeight); - - // Have we found an interval that can be evicted? - if (FutureEvictedPhysReg) { - float splitArtifactWeight = - VRAI->futureWeight(LIS->getInterval(VirtRegToSplit), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); - // Will the weight of the local interval be higher than the cheapest evictee - // weight? If so it will evict it and will not cause a spill. - if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) - return false; - } - - // The local interval is not able to find non interferencing assignment and - // not able to evict a less worthy interval, therfore, it can cause a spill. + // The local interval is not able to find non interferencing assignment + // and not able to evict a less worthy interval, therfore, it can cause a + // spill. return true; } @@ -2650,18 +2687,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // with VirtReg on PhysReg (or one of its aliases). // Enqueue them for recoloring and perform the actual recoloring. PQueue RecoloringQueue; - for (SmallLISet::iterator It = RecoloringCandidates.begin(), - EndIt = RecoloringCandidates.end(); - It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg(); - enqueue(RecoloringQueue, *It); + for (LiveInterval *RC : RecoloringCandidates) { + Register ItVirtReg = RC->reg(); + enqueue(RecoloringQueue, RC); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); // Record the current allocation. VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); // unset the related struct. - Matrix->unassign(**It); + Matrix->unassign(*RC); } // Do as if VirtReg was assigned to PhysReg so that the underlying @@ -2695,22 +2730,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // don't add it to NewVRegs because its physical register will be restored // below. Other vregs in CurrentNewVRegs are created by calling // selectOrSplit and should be added into NewVRegs. - for (SmallVectorImpl<Register>::iterator Next = CurrentNewVRegs.begin(), - End = CurrentNewVRegs.end(); - Next != End; ++Next) { - if (RecoloringCandidates.count(&LIS->getInterval(*Next))) + for (Register &R : CurrentNewVRegs) { + if (RecoloringCandidates.count(&LIS->getInterval(R))) continue; - NewVRegs.push_back(*Next); + NewVRegs.push_back(R); } - for (SmallLISet::iterator It = RecoloringCandidates.begin(), - EndIt = RecoloringCandidates.end(); - It != EndIt; ++It) { - Register ItVirtReg = (*It)->reg(); + for (LiveInterval *RC : RecoloringCandidates) { + Register ItVirtReg = RC->reg(); if (VRM->hasPhys(ItVirtReg)) - Matrix->unassign(**It); + Matrix->unassign(*RC); MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; - Matrix->assign(**It, ItPhysReg); + Matrix->assign(*RC, ItPhysReg); } } @@ -2793,7 +2824,7 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// to use the CSR; otherwise return 0. MCRegister RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - MCRegister PhysReg, unsigned &CostPerUseLimit, + MCRegister PhysReg, uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost @@ -2924,7 +2955,12 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { if (Register::isPhysicalRegister(Reg)) continue; - assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + // This may be a skipped class + if (!VRM->hasPhys(Reg)) { + assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) && + "We have an unallocated variable which should have been handled"); + continue; + } // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. @@ -3024,13 +3060,13 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { - unsigned CostPerUseLimit = ~0u; + uint8_t CostPerUseLimit = uint8_t(~0u); // First try assigning a free register. auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (MCRegister PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { - // If VirtReg got an assignment, the eviction info is no longre relevant. + // If VirtReg got an assignment, the eviction info is no longer relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical @@ -3067,7 +3103,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); // If VirtReg eviction someone, the eviction info for it as an evictee is - // no longre relevant. + // no longer relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -3133,75 +3169,162 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return 0; } -void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, - unsigned &FoldedReloads, - unsigned &Spills, - unsigned &FoldedSpills) { - Reloads = 0; - FoldedReloads = 0; - Spills = 0; - FoldedSpills = 0; - - // Sum up the spill and reloads in subloops. - for (MachineLoop *SubLoop : *L) { - unsigned SubReloads; - unsigned SubFoldedReloads; - unsigned SubSpills; - unsigned SubFoldedSpills; - - reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads, - SubSpills, SubFoldedSpills); - Reloads += SubReloads; - FoldedReloads += SubFoldedReloads; - Spills += SubSpills; - FoldedSpills += SubFoldedSpills; +void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) { + using namespace ore; + if (Spills) { + R << NV("NumSpills", Spills) << " spills "; + R << NV("TotalSpillsCost", SpillsCost) << " total spills cost "; + } + if (FoldedSpills) { + R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; + R << NV("TotalFoldedSpillsCost", FoldedSpillsCost) + << " total folded spills cost "; + } + if (Reloads) { + R << NV("NumReloads", Reloads) << " reloads "; + R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost "; + } + if (FoldedReloads) { + R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + R << NV("TotalFoldedReloadsCost", FoldedReloadsCost) + << " total folded reloads cost "; + } + if (ZeroCostFoldedReloads) + R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads) + << " zero cost folded reloads "; + if (Copies) { + R << NV("NumVRCopies", Copies) << " virtual registers copies "; + R << NV("TotalCopiesCost", CopiesCost) << " total copies cost "; } +} +RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { + RAGreedyStats Stats; const MachineFrameInfo &MFI = MF->getFrameInfo(); int FI; + auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { + return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>( + A->getPseudoValue())->getFrameIndex()); + }; + auto isPatchpointInstr = [](const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::STATEPOINT; + }; + for (MachineInstr &MI : MBB) { + if (MI.isCopy()) { + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); + if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() && + Src.getReg().isVirtual()) + ++Stats.Copies; + continue; + } + + SmallVector<const MachineMemOperand *, 2> Accesses; + if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { + ++Stats.Reloads; + continue; + } + if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { + ++Stats.Spills; + continue; + } + if (TII->hasLoadFromStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) { + if (!isPatchpointInstr(MI)) { + Stats.FoldedReloads += Accesses.size(); + continue; + } + // For statepoint there may be folded and zero cost folded stack reloads. + std::pair<unsigned, unsigned> NonZeroCostRange = + TII->getPatchpointUnfoldableRange(MI); + SmallSet<unsigned, 16> FoldedReloads; + SmallSet<unsigned, 16> ZeroCostFoldedReloads; + for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex())) + continue; + if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second) + FoldedReloads.insert(MO.getIndex()); + else + ZeroCostFoldedReloads.insert(MO.getIndex()); + } + // If stack slot is used in folded reload it is not zero cost then. + for (unsigned Slot : FoldedReloads) + ZeroCostFoldedReloads.erase(Slot); + Stats.FoldedReloads += FoldedReloads.size(); + Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size(); + continue; + } + Accesses.clear(); + if (TII->hasStoreToStackSlot(MI, Accesses) && + llvm::any_of(Accesses, isSpillSlotAccess)) { + Stats.FoldedSpills += Accesses.size(); + } + } + // Set cost of collected statistic by multiplication to relative frequency of + // this basic block. + float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB); + Stats.ReloadsCost = RelFreq * Stats.Reloads; + Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads; + Stats.SpillsCost = RelFreq * Stats.Spills; + Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills; + Stats.CopiesCost = RelFreq * Stats.Copies; + return Stats; +} + +RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) { + RAGreedyStats Stats; + + // Sum up the spill and reloads in subloops. + for (MachineLoop *SubLoop : *L) + Stats.add(reportStats(SubLoop)); + for (MachineBasicBlock *MBB : L->getBlocks()) // Handle blocks that were not included in subloops. if (Loops->getLoopFor(MBB) == L) - for (MachineInstr &MI : *MBB) { - SmallVector<const MachineMemOperand *, 2> Accesses; - auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { - return MFI.isSpillSlotObjectIndex( - cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) - ->getFrameIndex()); - }; - - if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) - ++Reloads; - else if (TII->hasLoadFromStackSlot(MI, Accesses) && - llvm::any_of(Accesses, isSpillSlotAccess)) - ++FoldedReloads; - else if (TII->isStoreToStackSlot(MI, FI) && - MFI.isSpillSlotObjectIndex(FI)) - ++Spills; - else if (TII->hasStoreToStackSlot(MI, Accesses) && - llvm::any_of(Accesses, isSpillSlotAccess)) - ++FoldedSpills; - } + Stats.add(computeStats(*MBB)); - if (Reloads || FoldedReloads || Spills || FoldedSpills) { + if (!Stats.isEmpty()) { using namespace ore; ORE->emit([&]() { - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies", L->getStartLoc(), L->getHeader()); - if (Spills) - R << NV("NumSpills", Spills) << " spills "; - if (FoldedSpills) - R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; - if (Reloads) - R << NV("NumReloads", Reloads) << " reloads "; - if (FoldedReloads) - R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + Stats.report(R); R << "generated in loop"; return R; }); } + return Stats; +} + +void RAGreedy::reportStats() { + if (!ORE->allowExtraAnalysis(DEBUG_TYPE)) + return; + RAGreedyStats Stats; + for (MachineLoop *L : *Loops) + Stats.add(reportStats(L)); + // Process non-loop blocks. + for (MachineBasicBlock &MBB : *MF) + if (!Loops->getLoopFor(&MBB)) + Stats.add(computeStats(MBB)); + if (!Stats.isEmpty()) { + using namespace ore; + + ORE->emit([&]() { + DebugLoc Loc; + if (auto *SP = MF->getFunction().getSubprogram()) + Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP); + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc, + &MF->front()); + Stats.report(R); + R << "generated in function"; + return R; + }); + } } bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { @@ -3232,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); - SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); @@ -3241,14 +3363,17 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); + RegCosts = TRI->getRegisterCosts(*MF); + VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI)); VRAI->calculateSpillWeightsAndHints(); LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); + SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; @@ -3259,8 +3384,11 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { allocatePhysRegs(); tryHintsRecoloring(); + + if (VerifyEnabled) + MF->verify(this, "Before post optimization"); postOptimization(); - reportNumberOfSplillsReloads(); + reportStats(); releaseMemory(); return true; diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 7c5af1a0c56e..b22eb080791e 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -703,9 +703,8 @@ void RegAllocPBQP::spillVReg(Register VReg, // Copy any newly inserted live intervals into the list of regs to // allocate. - for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); - I != E; ++I) { - const LiveInterval &LI = LIS.getInterval(*I); + for (const Register &R : LRE) { + const LiveInterval &LI = LIS.getInterval(R); assert(!LI.empty() && "Empty spill range."); LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " "); VRegsToAlloc.insert(LI.reg()); @@ -759,10 +758,8 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); // First allocate registers for the empty intervals. - for (RegSet::const_iterator - I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); - I != E; ++I) { - LiveInterval &LI = LIS.getInterval(*I); + for (const Register &R : EmptyIntervalVRegs) { + LiveInterval &LI = LIS.getInterval(R); Register PReg = MRI.getSimpleHint(LI.reg()); @@ -803,7 +800,14 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI); VRAI.calculateSpillWeightsAndHints(); - std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); + // FIXME: we create DefaultVRAI here to match existing behavior pre-passing + // the VRAI through the spiller to the live range editor. However, it probably + // makes more sense to pass the PBQP VRAI. The existing behavior had + // LiveRangeEdit make its own VirtRegAuxInfo object. + VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), + MBFI); + std::unique_ptr<Spiller> VRegSpiller( + createInlineSpiller(*this, MF, VRM, DefaultVRAI)); MF.getRegInfo().freezeReservedRegs(MF); @@ -855,7 +859,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::string GraphFileName = FullyQualifiedName + "." + RS.str() + ".pbqpgraph"; std::error_code EC; - raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text); + raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF); LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); G.dump(OS); diff --git a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 0c3e8a89c920..800d952469a5 100644 --- a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -30,8 +30,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include <map> -#include <string> using namespace llvm; diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp index 0488db3d09cb..797899fb5b86 100644 --- a/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -68,6 +68,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } CalleeSavedRegs = CSR; + RegCosts = TRI->getRegisterCosts(*MF); + // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); if (Reserved.size() != RR.size() || RR != Reserved) { @@ -100,8 +102,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned N = 0; SmallVector<MCPhysReg, 16> CSRAlias; - unsigned MinCost = 0xff; - unsigned LastCost = ~0u; + uint8_t MinCost = uint8_t(~0u); + uint8_t LastCost = uint8_t(~0u); unsigned LastCostChange = 0; // FIXME: Once targets reserve registers instead of removing them from the @@ -112,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; - unsigned Cost = TRI->getCostPerUse(PhysReg); + uint8_t Cost = RegCosts[PhysReg]; MinCost = std::min(MinCost, Cost); if (CalleeSavedAliases[PhysReg] && @@ -132,7 +134,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // CSR aliases go after the volatile registers, preserve the target's order. for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { unsigned PhysReg = CSRAlias[i]; - unsigned Cost = TRI->getCostPerUse(PhysReg); + uint8_t Cost = RegCosts[PhysReg]; if (Cost != LastCost) LastCostChange = N; RCI.Order[N++] = PhysReg; @@ -149,7 +151,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; - RCI.MinCost = uint8_t(MinCost); + RCI.MinCost = MinCost; RCI.LastCostChange = LastCostChange; LLVM_DEBUG({ diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7fdc85a6e444..751f79e66b73 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -133,6 +133,20 @@ namespace { AliasAnalysis *AA = nullptr; RegisterClassInfo RegClassInfo; + /// Position and VReg of a PHI instruction during coalescing. + struct PHIValPos { + SlotIndex SI; ///< Slot where this PHI occurs. + Register Reg; ///< VReg the PHI occurs in. + unsigned SubReg; ///< Qualifying subregister for Reg. + }; + + /// Map from debug instruction number to PHI position during coalescing. + DenseMap<unsigned, PHIValPos> PHIValToPos; + /// Index of, for each VReg, which debug instruction numbers and + /// corresponding PHIs are sensitive to coalescing. Each VReg may have + /// multiple PHI defs, at different positions. + DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx; + /// Debug variable location tracking -- for each VReg, maintain an /// ordered-by-slot-index set of DBG_VALUEs, to help quick /// identification of whether coalescing may change location validity. @@ -187,6 +201,11 @@ namespace { /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; @@ -590,6 +609,14 @@ void RegisterCoalescer::eliminateDeadDefs() { nullptr, this).eliminateDeadDefs(DeadDefs); } +bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + SmallVector<Register, 8> NewRegs; + return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this) + .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx); +} + void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); @@ -914,7 +941,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, if (UseMO.isUndef()) continue; MachineInstr *UseMI = UseMO.getParent(); - if (UseMI->isDebugValue()) { + if (UseMI->isDebugInstr()) { // FIXME These don't have an instruction index. Not clear we have enough // info to decide whether to do this replacement or not. For now do it. UseMO.setReg(NewReg); @@ -1329,6 +1356,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } } + if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx)) + return false; + DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = @@ -1543,9 +1573,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. if (MRI->use_nodbg_empty(SrcReg)) { - for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); + UI != MRI->use_end();) { + MachineOperand &UseMO = *UI++; MachineInstr *UseMI = UseMO.getParent(); - if (UseMI->isDebugValue()) { + if (UseMI->isDebugInstr()) { if (Register::isPhysicalRegister(DstReg)) UseMO.substPhysReg(DstReg, *TRI); else @@ -1726,7 +1758,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (SubReg == 0 || MO.isUndef()) continue; MachineInstr &MI = *MO.getParent(); - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true); addUndefFlag(*DstInt, UseIdx, MO, SubReg); @@ -1753,7 +1785,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // If SrcReg wasn't read, it may still be the case that DstReg is live-in // because SrcReg is a sub-register. - if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue()) + if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); // Replace SrcReg with DstReg in all UseMI operands. @@ -1768,24 +1800,27 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { - if (!DstInt->hasSubRanges()) { - BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); - LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); - LaneBitmask UnusedLanes = FullMask & ~UsedLanes; - DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); - // The unused lanes are just empty live-ranges at this point. - // It is the caller responsibility to set the proper - // dead segments if there is an actual dead def of the - // unused lanes. This may happen with rematerialization. - DstInt->createSubRange(Allocator, UnusedLanes); + if (MO.isUse() && !DstIsPhys) { + unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); + if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) { + if (!DstInt->hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = FullMask & ~UsedLanes; + DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); + // The unused lanes are just empty live-ranges at this point. + // It is the caller responsibility to set the proper + // dead segments if there is an actual dead def of the + // unused lanes. This may happen with rematerialization. + DstInt->createSubRange(Allocator, UnusedLanes); + } + SlotIndex MIIdx = UseMI->isDebugInstr() + ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) + : LIS->getInstructionIndex(*UseMI); + SlotIndex UseIdx = MIIdx.getRegSlot(true); + addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx); } - SlotIndex MIIdx = UseMI->isDebugValue() - ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) - : LIS->getInstructionIndex(*UseMI); - SlotIndex UseIdx = MIIdx.getRegSlot(true); - addUndefFlag(*DstInt, UseIdx, MO, SubIdx); } if (DstIsPhys) @@ -1796,7 +1831,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, LLVM_DEBUG({ dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) + if (!UseMI->isDebugInstr()) dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; dbgs() << *UseMI; }); @@ -2837,9 +2872,39 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()) return CR_Impossible; - // We need to verify that no instructions are reading the clobbered lanes. To - // save compile time, we'll only check that locally. Don't allow the tainted - // value to escape the basic block. + if (TrackSubRegLiveness) { + auto &OtherLI = LIS->getInterval(Other.Reg); + // If OtherVNI does not have subranges, it means all the lanes of OtherVNI + // share the same live range, so we just need to check whether they have + // any conflict bit in their LaneMask. + if (!OtherLI.hasSubRanges()) { + LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx); + return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible; + } + + // If we are clobbering some active lanes of OtherVNI at VNI->def, it is + // impossible to resolve the conflict. Otherwise, we can just replace + // OtherVNI because of no real conflict. + for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) { + LaneBitmask OtherMask = + TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask); + if ((OtherMask & V.WriteLanes).none()) + continue; + + auto OtherSRQ = OtherSR.Query(VNI->def); + if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) { + // VNI is clobbering some lanes of OtherVNI, they have real conflict. + return CR_Impossible; + } + } + + // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI. + return CR_Replace; + } + + // We need to verify that no instructions are reading the clobbered lanes. + // To save compile time, we'll only check that locally. Don't allow the + // tainted value to escape the basic block. MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) return CR_Impossible; @@ -2959,7 +3024,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx, LaneBitmask Lanes) const { - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) return false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) @@ -3006,8 +3071,10 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { MachineBasicBlock::iterator MI = MBB->begin(); if (!VNI->isPHIDef()) { MI = Indexes->getInstructionFromIndex(VNI->def); - // No need to check the instruction defining VNI for reads. - ++MI; + if (!VNI->def.isEarlyClobber()) { + // No need to check the instruction defining VNI for reads. + ++MI; + } } assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && "Interference ends on VNI->def. Should have been handled earlier"); @@ -3114,6 +3181,13 @@ void JoinVals::pruneValues(JoinVals &Other, } } +// Check if the segment consists of a copied live-through value (i.e. the copy +// in the block only extended the liveness, of an undef value which we may need +// to handle). +static bool isLiveThrough(const LiveQueryResult Q) { + return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut(); +} + /// Consider the following situation when coalescing the copy between /// %31 and %45 at 800. (The vertical lines represent live range segments.) /// @@ -3196,11 +3270,21 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // with V.OtherVNI. LIS->extendToIndices(S, EndPoints); } + + // We may need to eliminate the subrange if the copy introduced a live + // out undef value. + if (ValueOut->isPHIDef()) + ShrinkMask |= S.LaneMask; continue; } + // If a subrange ends at the copy, then a value was copied but only // partially used later. Shrink the subregister range appropriately. - if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { + // + // Ultimately this calls shrinkToUses, so assuming ShrinkMask is + // conservatively correct. + if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) || + (V.Resolution == CR_Erase && isLiveThrough(Q))) { LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); @@ -3526,6 +3610,64 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Scan and mark undef any DBG_VALUEs that would refer to a different value. checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals); + // If the RHS covers any PHI locations that were tracked for debug-info, we + // must update tracking information to reflect the join. + auto RegIt = RegToPHIIdx.find(CP.getSrcReg()); + if (RegIt != RegToPHIIdx.end()) { + // Iterate over all the debug instruction numbers assigned this register. + for (unsigned InstID : RegIt->second) { + auto PHIIt = PHIValToPos.find(InstID); + assert(PHIIt != PHIValToPos.end()); + const SlotIndex &SI = PHIIt->second.SI; + + // Does the RHS cover the position of this PHI? + auto LII = RHS.find(SI); + if (LII == RHS.end() || LII->start > SI) + continue; + + // Accept two kinds of subregister movement: + // * When we merge from one register class into a larger register: + // %1:gr16 = some-inst + // -> + // %2:gr32.sub_16bit = some-inst + // * When the PHI is already in a subregister, and the larger class + // is coalesced: + // %2:gr32.sub_16bit = some-inst + // %3:gr32 = COPY %2 + // -> + // %3:gr32.sub_16bit = some-inst + // Test for subregister move: + if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0) + // If we're moving between different subregisters, ignore this join. + // The PHI will not get a location, dropping variable locations. + if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx()) + continue; + + // Update our tracking of where the PHI is. + PHIIt->second.Reg = CP.getDstReg(); + + // If we merge into a sub-register of a larger class (test above), + // update SubReg. + if (CP.getSrcIdx() != 0) + PHIIt->second.SubReg = CP.getSrcIdx(); + } + + // Rebuild the register index in RegToPHIIdx to account for PHIs tracking + // different VRegs now. Copy old collection of debug instruction numbers and + // erase the old one: + auto InstrNums = RegIt->second; + RegToPHIIdx.erase(RegIt); + + // There might already be PHIs being tracked in the destination VReg. Insert + // into an existing tracking collection, or insert a new one. + RegIt = RegToPHIIdx.find(CP.getDstReg()); + if (RegIt != RegToPHIIdx.end()) + RegIt->second.insert(RegIt->second.end(), InstrNums.begin(), + InstrNums.end()); + else + RegToPHIIdx.insert({CP.getDstReg(), InstrNums}); + } + // Join RHS into LHS. LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); @@ -3565,8 +3707,12 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) // After collecting a block of DBG_VALUEs into ToInsert, enter them into the // vreg => DbgValueLoc map. auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { - for (auto *X : ToInsert) - DbgVRegToValues[X->getDebugOperand(0).getReg()].push_back({Slot, X}); + for (auto *X : ToInsert) { + for (auto Op : X->debug_operands()) { + if (Op.isReg() && Op.getReg().isVirtual()) + DbgVRegToValues[Op.getReg()].push_back({Slot, X}); + } + } ToInsert.clear(); }; @@ -3578,10 +3724,12 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB); for (auto &MI : MBB) { - if (MI.isDebugValue() && MI.getDebugOperand(0).isReg() && - MI.getDebugOperand(0).getReg().isVirtual()) { - ToInsert.push_back(&MI); - } else if (!MI.isDebugInstr()) { + if (MI.isDebugValue()) { + if (any_of(MI.debug_operands(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isVirtual(); + })) + ToInsert.push_back(&MI); + } else if (!MI.isDebugOrPseudoInstr()) { CurrentSlot = Slots.getInstructionIndex(MI); CloseNewDVRange(CurrentSlot); } @@ -3677,12 +3825,14 @@ void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg, if (DbgValueSetIt->first < SegmentIt->end) { // "Other" is live and there is a DBG_VALUE of Reg: test if we should // set it undef. - if (DbgValueSetIt->first >= SegmentIt->start && - DbgValueSetIt->second->getDebugOperand(0).getReg() != 0 && - ShouldUndef(DbgValueSetIt->first)) { - // Mark undef, erase record of this DBG_VALUE to avoid revisiting. - DbgValueSetIt->second->setDebugValueUndef(); - continue; + if (DbgValueSetIt->first >= SegmentIt->start) { + bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg); + bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first); + if (HasReg && ShouldUndefReg) { + // Mark undef, erase record of this DBG_VALUE to avoid revisiting. + DbgValueSetIt->second->setDebugValueUndef(); + continue; + } } ++DbgValueSetIt; } else { @@ -3857,21 +4007,20 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ++MII) { - if (!MII->isCopyLike()) + for (MachineInstr &MI : *MBB) { + if (!MI.isCopyLike()) continue; - bool ApplyTerminalRule = applyTerminalRule(*MII); - if (isLocalCopy(&(*MII), LIS)) { + bool ApplyTerminalRule = applyTerminalRule(MI); + if (isLocalCopy(&MI, LIS)) { if (ApplyTerminalRule) - LocalTerminals.push_back(&(*MII)); + LocalTerminals.push_back(&MI); else - LocalWorkList.push_back(&(*MII)); + LocalWorkList.push_back(&MI); } else { if (ApplyTerminalRule) - GlobalTerminals.push_back(&(*MII)); + GlobalTerminals.push_back(&MI); else - WorkList.push_back(&(*MII)); + WorkList.push_back(&MI); } } // Append the copies evicted by the terminal rule at the end of the list. @@ -3915,10 +4064,9 @@ void RegisterCoalescer::joinAllIntervals() { std::vector<MBBPriorityInfo> MBBs; MBBs.reserve(MF->size()); - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), - JoinSplitEdges && isSplitEdge(MBB))); + for (MachineBasicBlock &MBB : *MF) { + MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB), + JoinSplitEdges && isSplitEdge(&MBB))); } array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); @@ -3981,6 +4129,19 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + // If there are PHIs tracked by debug-info, they will need updating during + // coalescing. Build an index of those PHIs to ease updating. + SlotIndexes *Slots = LIS->getSlotIndexes(); + for (const auto &DebugPHI : MF->DebugPHIPositions) { + MachineBasicBlock *MBB = DebugPHI.second.MBB; + Register Reg = DebugPHI.second.Reg; + unsigned SubReg = DebugPHI.second.SubReg; + SlotIndex SI = Slots->getMBBStartIdx(MBB); + PHIValPos P = {SI, Reg, SubReg}; + PHIValToPos.insert(std::make_pair(DebugPHI.first, P)); + RegToPHIIdx[Reg].push_back(DebugPHI.first); + } + // The MachineScheduler does not currently require JoinSplitEdges. This will // either be enabled unconditionally or replaced by a more general live range // splitting optimization. @@ -4036,6 +4197,18 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } } + // After coalescing, update any PHIs that are being tracked by debug-info + // with their new VReg locations. + for (auto &p : MF->DebugPHIPositions) { + auto it = PHIValToPos.find(p.first); + assert(it != PHIValToPos.end()); + p.second.Reg = it->second.Reg; + p.second.SubReg = it->second.SubReg; + } + + PHIValToPos.clear(); + RegToPHIIdx.clear(); + LLVM_DEBUG(dump()); if (VerifyCoalescing) MF->verify(this, "After register coalescing"); diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index 8f1fc103e869..62a459fca611 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -764,7 +764,7 @@ void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) { /// instruction independent of liveness. void RegPressureTracker::recede(const RegisterOperands &RegOpers, SmallVectorImpl<RegisterMaskPair> *LiveUses) { - assert(!CurrPos->isDebugInstr()); + assert(!CurrPos->isDebugOrPseudoInstr()); // Boost pressure for all dead defs together. bumpDeadDefs(RegOpers.DeadDefs); @@ -863,7 +863,7 @@ void RegPressureTracker::recedeSkipDebugValues() { CurrPos = prev_nodbg(CurrPos, MBB->begin()); SlotIndex SlotIdx; - if (RequireIntervals && !CurrPos->isDebugInstr()) + if (RequireIntervals && !CurrPos->isDebugOrPseudoInstr()) SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); // Open the top of the region using slot indexes. @@ -873,9 +873,9 @@ void RegPressureTracker::recedeSkipDebugValues() { void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) { recedeSkipDebugValues(); - if (CurrPos->isDebugValue()) { - // It's possible to only have debug_value instructions and hit the start of - // the block. + if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) { + // It's possible to only have debug_value and pseudo probe instructions and + // hit the start of the block. assert(CurrPos == MBB->begin()); return; } @@ -1041,7 +1041,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { - assert(!MI->isDebugInstr() && "Expect a nondebug instruction."); + assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction."); SlotIndex SlotIdx; if (RequireIntervals) @@ -1282,7 +1282,7 @@ LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { - assert(!MI->isDebugInstr() && "Expect a nondebug instruction."); + assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction."); SlotIndex SlotIdx; if (RequireIntervals) diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index a833895c115d..e35cf7aa6958 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -119,7 +119,7 @@ void RegScavenger::determineKillsAndDefs() { DefRegUnits.reset(); for (const MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) { - TmpRegUnits.clear(); + TmpRegUnits.reset(); for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) { for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { if (MO.clobbersPhysReg(*RURI)) { @@ -167,16 +167,15 @@ void RegScavenger::forward() { MachineInstr &MI = *MBBI; - for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { - if (I->Restore != &MI) + for (ScavengedInfo &I : Scavenged) { + if (I.Restore != &MI) continue; - I->Reg = 0; - I->Restore = nullptr; + I.Reg = 0; + I.Restore = nullptr; } - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) return; determineKillsAndDefs(); @@ -299,7 +298,7 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { - if (MI->isDebugInstr()) { + if (MI->isDebugOrPseudoInstr()) { ++InstrLimit; // Don't count debug instructions continue; } @@ -370,6 +369,10 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); LiveRegUnits Used(TRI); + assert(From->getParent() == To->getParent() && + "Target instruction is in other than current basic block, use " + "enterBasicBlockEnd first"); + for (MachineBasicBlock::iterator I = From;; --I) { const MachineInstr &MI = *I; @@ -424,6 +427,8 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, if (I == MBB.begin()) break; } + assert(I != MBB.begin() && "Did not find target instruction while " + "iterating backwards"); } return std::make_pair(Survivor, Pos); diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp new file mode 100644 index 000000000000..de6129a912d3 --- /dev/null +++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -0,0 +1,231 @@ +//===- RemoveRedundantDebugValues.cpp - Remove Redundant Debug Value MIs --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +/// \file RemoveRedundantDebugValues.cpp +/// +/// The RemoveRedundantDebugValues pass removes redundant DBG_VALUEs that +/// appear in MIR after the register allocator. + +#define DEBUG_TYPE "removeredundantdebugvalues" + +using namespace llvm; + +STATISTIC(NumRemovedBackward, "Number of DBG_VALUEs removed (backward scan)"); +STATISTIC(NumRemovedForward, "Number of DBG_VALUEs removed (forward scan)"); + +namespace { + +class RemoveRedundantDebugValues : public MachineFunctionPass { +public: + static char ID; + + RemoveRedundantDebugValues(); + + bool reduceDbgValues(MachineFunction &MF); + + /// Remove redundant debug value MIs for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char RemoveRedundantDebugValues::ID = 0; + +char &llvm::RemoveRedundantDebugValuesID = RemoveRedundantDebugValues::ID; + +INITIALIZE_PASS(RemoveRedundantDebugValues, DEBUG_TYPE, + "Remove Redundant DEBUG_VALUE analysis", false, false) + +/// Default construct and initialize the pass. +RemoveRedundantDebugValues::RemoveRedundantDebugValues() + : MachineFunctionPass(ID) { + initializeRemoveRedundantDebugValuesPass(*PassRegistry::getPassRegistry()); +} + +// This analysis aims to remove redundant DBG_VALUEs by going forward +// in the basic block by considering the first DBG_VALUE as a valid +// until its first (location) operand is not clobbered/modified. +// For example: +// (1) DBG_VALUE $edi, !"var1", ... +// (2) <block of code that does affect $edi> +// (3) DBG_VALUE $edi, !"var1", ... +// ... +// in this case, we can remove (3). +// TODO: Support DBG_VALUE_LIST and other debug instructions. +static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "\n == Forward Scan == \n"); + + SmallVector<MachineInstr *, 8> DbgValsToBeRemoved; + DenseMap<DebugVariable, std::pair<MachineOperand *, const DIExpression *>> + VariableMap; + const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + + for (auto &MI : MBB) { + if (MI.isDebugValue()) { + DebugVariable Var(MI.getDebugVariable(), NoneType(), + MI.getDebugLoc()->getInlinedAt()); + auto VMI = VariableMap.find(Var); + // Just stop tracking this variable, until we cover DBG_VALUE_LIST. + // 1 DBG_VALUE $rax, "x", DIExpression() + // ... + // 2 DBG_VALUE_LIST "x", DIExpression(...), $rax, $rbx + // ... + // 3 DBG_VALUE $rax, "x", DIExpression() + if (MI.isDebugValueList() && VMI != VariableMap.end()) { + VariableMap.erase(VMI); + continue; + } + + MachineOperand &Loc = MI.getDebugOperand(0); + if (!Loc.isReg()) { + // If it it's not a register, just stop tracking such variable. + if (VMI != VariableMap.end()) + VariableMap.erase(VMI); + continue; + } + + // We have found a new value for a variable. + if (VMI == VariableMap.end() || + VMI->second.first->getReg() != Loc.getReg() || + VMI->second.second != MI.getDebugExpression()) { + VariableMap[Var] = {&Loc, MI.getDebugExpression()}; + continue; + } + + // Found an identical DBG_VALUE, so it can be considered + // for later removal. + DbgValsToBeRemoved.push_back(&MI); + } + + if (MI.isMetaInstruction()) + continue; + + // Stop tracking any location that is clobbered by this instruction. + for (auto &Var : VariableMap) { + auto &LocOp = Var.second.first; + if (MI.modifiesRegister(LocOp->getReg(), TRI)) + VariableMap.erase(Var.first); + } + } + + for (auto &Instr : DbgValsToBeRemoved) { + LLVM_DEBUG(dbgs() << "removing "; Instr->dump()); + Instr->eraseFromParent(); + ++NumRemovedForward; + } + + return !DbgValsToBeRemoved.empty(); +} + +// This analysis aims to remove redundant DBG_VALUEs by going backward +// in the basic block and removing all but the last DBG_VALUE for any +// given variable in a set of consecutive DBG_VALUE instructions. +// For example: +// (1) DBG_VALUE $edi, !"var1", ... +// (2) DBG_VALUE $esi, !"var2", ... +// (3) DBG_VALUE $edi, !"var1", ... +// ... +// in this case, we can remove (1). +static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "\n == Backward Scan == \n"); + SmallVector<MachineInstr *, 8> DbgValsToBeRemoved; + SmallDenseSet<DebugVariable> VariableSet; + + for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + I != E; ++I) { + MachineInstr *MI = &*I; + + if (MI->isDebugValue()) { + DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), + MI->getDebugLoc()->getInlinedAt()); + auto R = VariableSet.insert(Var); + // If it is a DBG_VALUE describing a constant as: + // DBG_VALUE 0, ... + // we just don't consider such instructions as candidates + // for redundant removal. + if (MI->isNonListDebugValue()) { + MachineOperand &Loc = MI->getDebugOperand(0); + if (!Loc.isReg()) { + // If we have already encountered this variable, just stop + // tracking it. + if (!R.second) + VariableSet.erase(Var); + continue; + } + } + + // We have already encountered the value for this variable, + // so this one can be deleted. + if (!R.second) + DbgValsToBeRemoved.push_back(MI); + continue; + } + + // If we encountered a non-DBG_VALUE, try to find the next + // sequence with consecutive DBG_VALUE instructions. + VariableSet.clear(); + } + + for (auto &Instr : DbgValsToBeRemoved) { + LLVM_DEBUG(dbgs() << "removing "; Instr->dump()); + Instr->eraseFromParent(); + ++NumRemovedBackward; + } + + return !DbgValsToBeRemoved.empty(); +} + +bool RemoveRedundantDebugValues::reduceDbgValues(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "\nDebug Value Reduction\n"); + + bool Changed = false; + + for (auto &MBB : MF) { + Changed |= reduceDbgValsBackwardScan(MBB); + Changed |= reduceDbgValsForwardScan(MBB); + } + + return Changed; +} + +bool RemoveRedundantDebugValues::runOnMachineFunction(MachineFunction &MF) { + // Skip functions without debugging information. + if (!MF.getFunction().getSubprogram()) + return false; + + // Skip functions from NoDebug compilation units. + if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() == + DICompileUnit::NoDebug) + return false; + + bool Changed = reduceDbgValues(MF); + return Changed; +} diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp new file mode 100644 index 000000000000..1619381967c4 --- /dev/null +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -0,0 +1,254 @@ +//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics +// with vector operands) with matching calls to functions from a vector +// library (e.g., libmvec, SVML) according to TargetLibraryInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "replace-with-veclib" + +STATISTIC(NumCallsReplaced, + "Number of calls to intrinsics that have been replaced."); + +STATISTIC(NumTLIFuncDeclAdded, + "Number of vector library function declarations added."); + +STATISTIC(NumFuncUsedAdded, + "Number of functions added to `llvm.compiler.used`"); + +static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) { + Module *M = CI.getModule(); + + Function *OldFunc = CI.getCalledFunction(); + + // Check if the vector library function is already declared in this module, + // otherwise insert it. + Function *TLIFunc = M->getFunction(TLIName); + if (!TLIFunc) { + TLIFunc = Function::Create(OldFunc->getFunctionType(), + Function::ExternalLinkage, TLIName, *M); + TLIFunc->copyAttributesFrom(OldFunc); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `" + << TLIName << "` of type `" << *(TLIFunc->getType()) + << "` to module.\n"); + + ++NumTLIFuncDeclAdded; + + // Add the freshly created function to llvm.compiler.used, + // similar to as it is done in InjectTLIMappings + appendToCompilerUsed(*M, {TLIFunc}); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName + << "` to `@llvm.compiler.used`.\n"); + ++NumFuncUsedAdded; + } + + // Replace the call to the vector intrinsic with a call + // to the corresponding function from the vector library. + IRBuilder<> IRBuilder(&CI); + SmallVector<Value *> Args(CI.arg_operands()); + // Preserve the operand bundles. + SmallVector<OperandBundleDef, 1> OpBundles; + CI.getOperandBundlesAsDefs(OpBundles); + CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles); + assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() && + "Expecting function types to be identical"); + CI.replaceAllUsesWith(Replacement); + if (isa<FPMathOperator>(Replacement)) { + // Preserve fast math flags for FP math. + Replacement->copyFastMathFlags(&CI); + } + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" + << OldFunc->getName() << "` with call to `" << TLIName + << "`.\n"); + ++NumCallsReplaced; + return true; +} + +static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, + CallInst &CI) { + if (!CI.getCalledFunction()) { + return false; + } + + auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID(); + if (IntrinsicID == Intrinsic::not_intrinsic) { + // Replacement is only performed for intrinsic functions + return false; + } + + // Convert vector arguments to scalar type and check that + // all vector operands have identical vector width. + ElementCount VF = ElementCount::getFixed(0); + SmallVector<Type *> ScalarTypes; + for (auto Arg : enumerate(CI.arg_operands())) { + auto *ArgType = Arg.value()->getType(); + // Vector calls to intrinsics can still have + // scalar operands for specific arguments. + if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) { + ScalarTypes.push_back(ArgType); + } else { + // The argument in this place should be a vector if + // this is a call to a vector intrinsic. + auto *VectorArgTy = dyn_cast<VectorType>(ArgType); + if (!VectorArgTy) { + // The argument is not a vector, do not perform + // the replacement. + return false; + } + ElementCount NumElements = VectorArgTy->getElementCount(); + if (NumElements.isScalable()) { + // The current implementation does not support + // scalable vectors. + return false; + } + if (VF.isNonZero() && VF != NumElements) { + // The different arguments differ in vector size. + return false; + } else { + VF = NumElements; + } + ScalarTypes.push_back(VectorArgTy->getElementType()); + } + } + + // Try to reconstruct the name for the scalar version of this + // intrinsic using the intrinsic ID and the argument types + // converted to scalar above. + std::string ScalarName; + if (Intrinsic::isOverloaded(IntrinsicID)) { + ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule()); + } else { + ScalarName = Intrinsic::getName(IntrinsicID).str(); + } + + if (!TLI.isFunctionVectorizable(ScalarName)) { + // The TargetLibraryInfo does not contain a vectorized version of + // the scalar function. + return false; + } + + // Try to find the mapping for the scalar version of this intrinsic + // and the exact vector width of the call operands in the + // TargetLibraryInfo. + const std::string TLIName = + std::string(TLI.getVectorizedFunction(ScalarName, VF)); + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `" + << ScalarName << "` and vector width " << VF << ".\n"); + + if (!TLIName.empty()) { + // Found the correct mapping in the TargetLibraryInfo, + // replace the call to the intrinsic with a call to + // the vector library function. + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName + << "`.\n"); + return replaceWithTLIFunction(CI, TLIName); + } + + return false; +} + +static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { + bool Changed = false; + SmallVector<CallInst *> ReplacedCalls; + for (auto &I : instructions(F)) { + if (auto *CI = dyn_cast<CallInst>(&I)) { + if (replaceWithCallToVeclib(TLI, *CI)) { + ReplacedCalls.push_back(CI); + Changed = true; + } + } + } + // Erase the calls to the intrinsics that have been replaced + // with calls to the vector library. + for (auto *CI : ReplacedCalls) { + CI->eraseFromParent(); + } + return Changed; +} + +//////////////////////////////////////////////////////////////////////////////// +// New pass manager implementation. +//////////////////////////////////////////////////////////////////////////////// +PreservedAnalyses ReplaceWithVeclib::run(Function &F, + FunctionAnalysisManager &AM) { + const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto Changed = runImpl(TLI, F); + if (Changed) { + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<TargetLibraryAnalysis>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<LoopAccessAnalysis>(); + PA.preserve<DemandedBitsAnalysis>(); + PA.preserve<OptimizationRemarkEmitterAnalysis>(); + return PA; + } else { + // The pass did not replace any calls, hence it preserves all analyses. + return PreservedAnalyses::all(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy PM Implementation. +//////////////////////////////////////////////////////////////////////////////// +bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) { + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + return runImpl(TLI, F); +} + +void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addPreserved<TargetLibraryInfoWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<LoopAccessLegacyAnalysis>(); + AU.addPreserved<DemandedBitsWrapperPass>(); + AU.addPreserved<OptimizationRemarkEmitterWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy Pass manager initialization +//////////////////////////////////////////////////////////////////////////////// +char ReplaceWithVeclibLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE, + "Replace intrinsics with calls to vector library", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE, + "Replace intrinsics with calls to vector library", false, + false) + +FunctionPass *llvm::createReplaceWithVeclibLegacyPass() { + return new ReplaceWithVeclibLegacy(); +} diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 31797631c97b..94add920f284 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -130,6 +131,7 @@ class SafeStack { Function &F; const TargetLoweringBase &TL; const DataLayout &DL; + DomTreeUpdater *DTU; ScalarEvolution &SE; Type *StackPtrTy; @@ -207,8 +209,8 @@ class SafeStack { public: SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL, - ScalarEvolution &SE) - : F(F), TL(TL), DL(DL), SE(SE), + DomTreeUpdater *DTU, ScalarEvolution &SE) + : F(F), TL(TL), DL(DL), DTU(DTU), SE(SE), StackPtrTy(Type::getInt8PtrTy(F.getContext())), IntPtrTy(DL.getIntPtrType(F.getContext())), Int32Ty(Type::getInt32Ty(F.getContext())), @@ -371,9 +373,13 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { Value *StackGuardVar = TL.getIRStackGuard(IRB); - if (!StackGuardVar) - StackGuardVar = - F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); + Module *M = F.getParent(); + + if (!StackGuardVar) { + TL.insertSSPDeclarations(*M); + return IRB.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard)); + } + return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard"); } @@ -419,8 +425,7 @@ void SafeStack::findInsts(Function &F, for (Argument &Arg : F.args()) { if (!Arg.hasByValAttr()) continue; - uint64_t Size = - DL.getTypeStoreSize(Arg.getType()->getPointerElementType()); + uint64_t Size = DL.getTypeStoreSize(Arg.getParamByValType()); if (IsSafeStackAlloca(&Arg, Size)) continue; @@ -477,8 +482,7 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI, .createBranchWeights(SuccessProb.getNumerator(), FailureProb.getNumerator()); Instruction *CheckTerm = - SplitBlockAndInsertIfThen(Cmp, &RI, - /* Unreachable */ true, Weights); + SplitBlockAndInsertIfThen(Cmp, &RI, /* Unreachable */ true, Weights, DTU); IRBuilder<> IRBFail(CheckTerm); // FIXME: respect -fsanitize-trap / -ftrap-function here? FunctionCallee StackChkFail = @@ -522,7 +526,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( } for (Argument *Arg : ByValArguments) { - Type *Ty = Arg->getType()->getPointerElementType(); + Type *Ty = Arg->getParamByValType(); uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. @@ -579,7 +583,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( for (Argument *Arg : ByValArguments) { unsigned Offset = SSL.getObjectOffset(Arg); MaybeAlign Align(SSL.getObjectAlignment(Arg)); - Type *Ty = Arg->getType()->getPointerElementType(); + Type *Ty = Arg->getParamByValType(); uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) @@ -864,6 +868,7 @@ public: AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } bool runOnFunction(Function &F) override { @@ -893,15 +898,34 @@ public: // Compute DT and LI only for functions that have the attribute. // This is only useful because the legacy pass manager doesn't let us // compute analyzes lazily. - // In the backend pipeline, nothing preserves DT before SafeStack, so we - // would otherwise always compute it wastefully, even if there is no - // function with the safestack attribute. - DominatorTree DT(F); - LoopInfo LI(DT); - ScalarEvolution SE(F, TLI, ACT, DT, LI); + DominatorTree *DT; + bool ShouldPreserveDominatorTree; + Optional<DominatorTree> LazilyComputedDomTree; + + // Do we already have a DominatorTree avaliable from the previous pass? + // Note that we should *NOT* require it, to avoid the case where we end up + // not needing it, but the legacy PM would have computed it for us anyways. + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { + DT = &DTWP->getDomTree(); + ShouldPreserveDominatorTree = true; + } else { + // Otherwise, we need to compute it. + LazilyComputedDomTree.emplace(F); + DT = LazilyComputedDomTree.getPointer(); + ShouldPreserveDominatorTree = false; + } + + // Likewise, lazily compute loop info. + LoopInfo LI(*DT); + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + ScalarEvolution SE(F, TLI, ACT, *DT, LI); - return SafeStack(F, *TL, *DL, SE).run(); + return SafeStack(F, *TL, *DL, ShouldPreserveDominatorTree ? &DTU : nullptr, + SE) + .run(); } }; @@ -912,6 +936,7 @@ char SafeStackLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE, "Safe Stack instrumentation pass", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE, "Safe Stack instrumentation pass", false, false) diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 5899da777fe9..daff3af3bc3c 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -514,7 +514,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); - assert(!MI->isDebugInstr()); + assert(!MI->isDebugOrPseudoInstr()); const MachineOperand &MO = MI->getOperand(OperIdx); Register Reg = MO.getReg(); @@ -572,7 +572,7 @@ void ScheduleDAGInstrs::initSUnits() { SUnits.reserve(NumRegionInstrs); for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; SUnit *SU = newSUnit(&MI); @@ -807,11 +807,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, DbgMI = nullptr; } - if (MI.isDebugValue() || MI.isDebugRef()) { + if (MI.isDebugValue() || MI.isDebugPHI()) { DbgMI = &MI; continue; } - if (MI.isDebugLabel()) + + if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe()) continue; SUnit *SU = MISUnitMap[&MI]; @@ -1117,7 +1118,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { // Examine block from end to start... for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; // Update liveness. Registers that are defed but not used in this @@ -1152,7 +1153,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { while (I->isBundledWithSucc()) ++I; do { - if (!I->isDebugInstr()) + if (!I->isDebugOrPseudoInstr()) toggleKills(MRI, LiveRegs, *I, true); --I; } while (I != Bundle); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 615bea2a4905..b104e995019f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -461,8 +461,7 @@ namespace { SDValue visitAssertExt(SDNode *N); SDValue visitAssertAlign(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); - SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); - SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitEXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitFREEZE(SDNode *N); @@ -547,8 +546,11 @@ namespace { SDValue foldSignChangeInBitcast(SDNode *N); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); + SDValue foldSelectOfBinops(SDNode *N); + SDValue foldSextSetcc(SDNode *N); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); + SDValue foldSubToUSubSat(EVT DstVT, SDNode *N); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -1673,8 +1675,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::AssertZext: return visitAssertExt(N); case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); - case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); - case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -2259,9 +2261,9 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { return FoldedVOp; // fold (add x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isBuildVectorAllZeros(N0.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N1; } @@ -2337,6 +2339,23 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; + + // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is + // equivalent to (add x, c). + auto ReassociateAddOr = [&](SDValue N0, SDValue N1) { + if (N0.getOpcode() == ISD::OR && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + return DAG.getNode(ISD::ADD, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + return SDValue(); + }; + if (SDValue Add = ReassociateAddOr(N0, N1)) + return Add; + if (SDValue Add = ReassociateAddOr(N1, N0)) + return Add; } // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) @@ -2502,6 +2521,26 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); } + // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2)) + if (N0.getOpcode() == ISD::STEP_VECTOR && + N1.getOpcode() == ISD::STEP_VECTOR) { + const APInt &C0 = N0->getConstantOperandAPInt(0); + const APInt &C1 = N1->getConstantOperandAPInt(0); + APInt NewStep = C0 + C1; + return DAG.getStepVector(DL, VT, NewStep); + } + + // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2) + if ((N0.getOpcode() == ISD::ADD) && + (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) && + (N1.getOpcode() == ISD::STEP_VECTOR)) { + const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); + const APInt &SV1 = N1->getConstantOperandAPInt(0); + APInt NewStep = SV0 + SV1; + SDValue SV = DAG.getStepVector(DL, VT, NewStep); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV); + } + return SDValue(); } @@ -2517,9 +2556,9 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { // TODO SimplifyVBinOp // fold (add_sat x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isBuildVectorAllZeros(N0.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N1; } @@ -3125,6 +3164,82 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, return SDValue(); } +// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a +// clamp/truncation if necessary. +static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + const SDLoc &DL) { + assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && + "Illegal truncation"); + + if (DstVT == SrcVT) + return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS); + + // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by + // clamping RHS. + APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), + DstVT.getScalarSizeInBits()); + if (!DAG.MaskedValueIsZero(LHS, UpperBits)) + return SDValue(); + + SDValue SatLimit = + DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(), + DstVT.getScalarSizeInBits()), + DL, SrcVT); + RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit); + RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS); + LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS); + return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS); +} + +// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to +// usubsat(a,b), optionally as a truncated type. +SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) { + if (N->getOpcode() != ISD::SUB || + !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT))) + return SDValue(); + + EVT SubVT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Try to find umax(a,b) - b or a - umin(a,b) patterns + // they may be converted to usubsat(a,b). + if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) { + SDValue MaxLHS = Op0.getOperand(0); + SDValue MaxRHS = Op0.getOperand(1); + if (MaxLHS == Op1) + return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N)); + if (MaxRHS == Op1) + return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N)); + } + + if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) { + SDValue MinLHS = Op1.getOperand(0); + SDValue MinRHS = Op1.getOperand(1); + if (MinLHS == Op0) + return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N)); + if (MinRHS == Op0) + return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N)); + } + + // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit))) + if (Op1.getOpcode() == ISD::TRUNCATE && + Op1.getOperand(0).getOpcode() == ISD::UMIN && + Op1.getOperand(0).hasOneUse()) { + SDValue MinLHS = Op1.getOperand(0).getOperand(0); + SDValue MinRHS = Op1.getOperand(0).getOperand(1); + if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0) + return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS, + DAG, SDLoc(N)); + if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0) + return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS, + DAG, SDLoc(N)); + } + + return SDValue(); +} + // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, @@ -3148,7 +3263,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return FoldedVOp; // fold (sub x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } @@ -3207,6 +3322,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && TLI.expandABS(N1.getNode(), Result, DAG, true)) return Result; + + // Fold neg(splat(neg(x)) -> splat(x) + if (VT.isVector()) { + SDValue N1S = DAG.getSplatValue(N1, true); + if (N1S && N1S.getOpcode() == ISD::SUB && + isNullConstant(N1S.getOperand(0))) { + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, N1S.getOperand(1)); + return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1)); + } + } } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) @@ -3343,6 +3469,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) return V; + if (SDValue V = foldSubToUSubSat(VT, N)) + return V; + // (x - y) - 1 -> add (xor y, -1), x if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), @@ -3434,12 +3563,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } - // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) + // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) if (N1.getOpcode() == ISD::VSCALE) { const APInt &IntVal = N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } + // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C)) + if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) { + APInt NewStep = -N1.getConstantOperandAPInt(0); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getStepVector(DL, VT, NewStep)); + } + // Prefer an add for more folding potential and possibly better codegen: // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { @@ -3478,7 +3614,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { // TODO SimplifyVBinOp // fold (sub_sat x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } @@ -3814,6 +3950,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getVScale(SDLoc(N), VT, C0 * C1); } + // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). + APInt MulVal; + if (N0.getOpcode() == ISD::STEP_VECTOR) + if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + APInt NewStep = C0 * MulVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); + } + // Fold ((mul x, 0/undef) -> 0, // (mul x, 1) -> x) -> x) // -> and(x, mask) @@ -4323,11 +4468,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 // do not return N0/N1, because undef node may exist. - if (ISD::isBuildVectorAllZeros(N0.getNode()) || - ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || + ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } + // fold (mulhs c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) + return C; + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4371,11 +4520,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 // do not return N0/N1, because undef node may exist. - if (ISD::isBuildVectorAllZeros(N0.getNode()) || - ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || + ISD::isConstantSplatVectorAllZeros(N1.getNode())) return DAG.getConstant(0, DL, VT); } + // fold (mulhu c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) + return C; + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4551,6 +4704,21 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { EVT CarryVT = N->getValueType(1); SDLoc DL(N); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // fold operation with constant operands. + // TODO: Move this to FoldConstantArithmetic when it supports nodes with + // multiple results. + if (N0C && N1C) { + bool Overflow; + APInt Result = + IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow) + : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow); + return CombineTo(N, DAG.getConstant(Result, DL, VT), + DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT)); + } + // canonicalize constant to RHS. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -4562,10 +4730,37 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) - if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, - N->getVTList(), N0, N0); + if (N1C && N1C->getAPIntValue() == 2) + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); + + if (IsSigned) { + // A 1 bit SMULO overflows if both inputs are 1. + if (VT.getScalarSizeInBits() == 1) { + SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1); + return CombineTo(N, And, + DAG.getSetCC(DL, CarryVT, And, + DAG.getConstant(0, DL, VT), ISD::SETNE)); + } + + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + unsigned SignBits = DAG.ComputeNumSignBits(N0); + if (SignBits > 1) + SignBits += DAG.ComputeNumSignBits(N1); + if (SignBits > VT.getScalarSizeInBits() + 1) + return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); + } else { + KnownBits N1Known = DAG.computeKnownBits(N1); + KnownBits N0Known = DAG.computeKnownBits(N0); + bool Overflow; + (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow); + if (!Overflow) + return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); + } return SDValue(); } @@ -4883,20 +5078,20 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, ConstantSDNode *C0 = isConstOrConstSplat(LR); ConstantSDNode *C1 = isConstOrConstSplat(RR); if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { - // Canonicalize larger constant as C0. - if (C1->getAPIntValue().ugt(C0->getAPIntValue())) - std::swap(C0, C1); - + const APInt &CMax = + APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue()); + const APInt &CMin = + APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue()); // The difference of the constants must be a single bit. - const APInt &C0Val = C0->getAPIntValue(); - const APInt &C1Val = C1->getAPIntValue(); - if ((C0Val - C1Val).isPowerOf2()) { - // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) --> - // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq - SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT); - SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC); - SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT); - SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC); + if ((CMax - CMin).isPowerOf2()) { + // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> + // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq + SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); + SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); + SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); + SDValue Mask = DAG.getNOT(DL, Diff, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); SDValue Zero = DAG.getConstant(0, DL, OpVT); return DAG.getSetCC(DL, VT, And, Zero, CC0); } @@ -5428,19 +5623,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return FoldedVOp; // fold (and x, 0) -> 0, vector edition - if (ISD::isBuildVectorAllZeros(N0.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition - if (ISD::isBuildVectorAllOnes(N0.getNode())) + if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) return N1; - if (ISD::isBuildVectorAllOnes(N1.getNode())) + if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) return N0; // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load @@ -6194,16 +6389,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return FoldedVOp; // fold (or x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N0.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N1; - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; // fold (or x, -1) -> -1, vector edition - if (ISD::isBuildVectorAllOnes(N0.getNode())) + if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); - if (ISD::isBuildVectorAllOnes(N1.getNode())) + if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); @@ -6517,8 +6712,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. +// +// The IsRotate flag should be set when the LHS of both shifts is the same. +// Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool IsRotate) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -6550,8 +6748,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + // + // NOTE: We can only do this when matching an AND and not a general + // funnel shift. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); @@ -6641,7 +6842,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, + /*IsRotate*/ true)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); @@ -6670,7 +6872,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); @@ -7098,14 +7300,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { if (LegalOperations) return SDValue(); - // Collect all the stores in the chain. - SDValue Chain; - SmallVector<StoreSDNode *, 8> Stores; - for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { - // TODO: Allow unordered atomics when wider type is legal (see D66309) - EVT MemVT = Store->getMemoryVT(); - if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || - !Store->isSimple() || Store->isIndexed()) + // We only handle merging simple stores of 1-4 bytes. + // TODO: Allow unordered atomics when wider type is legal (see D66309) + EVT MemVT = N->getMemoryVT(); + if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || + !N->isSimple() || N->isIndexed()) + return SDValue(); + + // Collect all of the stores in the chain. + SDValue Chain = N->getChain(); + SmallVector<StoreSDNode *, 8> Stores = {N}; + while (auto *Store = dyn_cast<StoreSDNode>(Chain)) { + // All stores must be the same size to ensure that we are writing all of the + // bytes in the wide value. + // TODO: We could allow multiple sizes by tracking each stored byte. + if (Store->getMemoryVT() != MemVT || !Store->isSimple() || + Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -7548,9 +7758,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return FoldedVOp; // fold (xor x, 0) -> x, vector edition - if (ISD::isBuildVectorAllZeros(N0.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N1; - if (ISD::isBuildVectorAllZeros(N1.getNode())) + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; } @@ -8253,6 +8463,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getVScale(SDLoc(N), VT, C0 << C1); } + // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). + APInt ShlVal; + if (N0.getOpcode() == ISD::STEP_VECTOR) + if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + if (ShlVal.ult(C0.getBitWidth())) { + APInt NewStep = C0 << ShlVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); + } + } + return SDValue(); } @@ -8361,13 +8582,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if (VT.isVector()) - ExtVT = EVT::getVectorVT(*DAG.getContext(), - ExtVT, VT.getVectorNumElements()); + ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, + VT.getVectorElementCount()); if (!LegalOperations || TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == TargetLowering::Legal) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); + // Even if we can't convert to sext_inreg, we might be able to remove + // this shift pair if the input is already sign extended. + if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue()) + return N0.getOperand(0); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) @@ -8390,9 +8615,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { SDValue ShiftValue; - if (VT.isVector()) + if (N1.getOpcode() == ISD::BUILD_VECTOR) ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); - else + else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + assert(ShiftValues.size() == 1 && + "Expected matchBinaryPredicate to return one element for " + "SPLAT_VECTORs"); + ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]); + } else ShiftValue = ShiftValues[0]; return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); } @@ -8412,7 +8642,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); if (VT.isVector()) - TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); // Determine the residual right-shift amount. int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -8452,7 +8682,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned ShiftAmt = N1C->getZExtValue(); EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); if (VT.isVector()) - TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); // TODO: The simple type check probably belongs in the default hook // implementation and/or target-specific overrides (because @@ -8865,6 +9095,40 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { return SDValue(); } +// Given a ABS node, detect the following pattern: +// (ABS (SUB (EXTEND a), (EXTEND b))). +// Generates UABD/SABD instruction. +static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue AbsOp1 = N->getOperand(0); + SDValue Op0, Op1; + + if (AbsOp1.getOpcode() != ISD::SUB) + return SDValue(); + + Op0 = AbsOp1.getOperand(0); + Op1 = AbsOp1.getOperand(1); + + unsigned Opc0 = Op0.getOpcode(); + // Check if the operands of the sub are (zero|sign)-extended. + if (Opc0 != Op1.getOpcode() || + (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) + return SDValue(); + + EVT VT1 = Op0.getOperand(0).getValueType(); + EVT VT2 = Op1.getOperand(0).getValueType(); + // Check if the operands are of same type and valid size. + unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; + if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) + return SDValue(); + + Op0 = Op0.getOperand(0); + Op1 = Op1.getOperand(0); + SDValue ABD = + DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); +} + SDValue DAGCombiner::visitABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -8878,6 +9142,10 @@ SDValue DAGCombiner::visitABS(SDNode *N) { // fold (abs x) -> x iff not-negative if (DAG.SignBitIsZero(N0)) return N0; + + if (SDValue ABD = combineABSToABD(N, DAG, TLI)) + return ABD; + return SDValue(); } @@ -9038,8 +9306,8 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue C1 = N->getOperand(1); SDValue C2 = N->getOperand(2); - assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && - "Expected select-of-constants"); + if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2)) + return SDValue(); EVT VT = N->getValueType(0); if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || @@ -9177,6 +9445,40 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return SDValue(); } +static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && + "Expected a (v)select"); + SDValue Cond = N->getOperand(0); + SDValue T = N->getOperand(1), F = N->getOperand(2); + EVT VT = N->getValueType(0); + if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1) + return SDValue(); + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); + + // select Cond, T, 1 --> or (not Cond), T + if (isOneOrOneSplat(F, /* AllowUndefs */ true)) { + SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); + } + + // select Cond, 0, F --> and (not Cond), F + if (isNullOrNullSplat(T, /* AllowUndefs */ true)) { + SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9189,30 +9491,11 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - // fold (select X, X, Y) -> (or X, Y) - // fold (select X, 1, Y) -> (or C, Y) - if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, DL, VT, N0, N2); - if (SDValue V = foldSelectOfConstants(N)) return V; - // fold (select C, 0, X) -> (and (not C), X) - if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { - SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); - } - // fold (select C, X, 1) -> (or (not C), X) - if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { - SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); - } - // fold (select X, Y, X) -> (and X, Y) - // fold (select X, Y, 0) -> (and X, Y) - if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) - return DAG.getNode(ISD::AND, DL, VT, N0, N1); + if (SDValue V = foldBoolSelectToLogic(N, DAG)) + return V; // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) @@ -9358,9 +9641,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SelectNode; } - return SimplifySelect(DL, N0, N1, N2); + if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2)) + return NewSel; } + if (!VT.isVector()) + if (SDValue BinOp = foldSelectOfBinops(N)) + return BinOp; + return SDValue(); } @@ -9471,20 +9759,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDLoc DL(N); // Zap scatters with a zero mask. - if (ISD::isBuildVectorAllZeros(Mask.getNode())) + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; if (refineUniformBase(BasePtr, Index, DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); } if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); } @@ -9498,12 +9786,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SDLoc DL(N); // Zap masked stores with a zero mask. - if (ISD::isBuildVectorAllZeros(Mask.getNode())) + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, compressing, or truncating stores? - if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), @@ -9527,13 +9815,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { SDLoc DL(N); // Zap gathers with a zero mask. - if (ISD::isBuildVectorAllZeros(Mask.getNode())) + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); if (refineUniformBase(BasePtr, Index, DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - PassThru.getValueType(), DL, Ops, + MGT->getMemoryVT(), DL, Ops, MGT->getMemOperand(), MGT->getIndexType(), MGT->getExtensionType()); } @@ -9541,7 +9829,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - PassThru.getValueType(), DL, Ops, + MGT->getMemoryVT(), DL, Ops, MGT->getMemOperand(), MGT->getIndexType(), MGT->getExtensionType()); } @@ -9555,12 +9843,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SDLoc DL(N); // Zap masked loads with a zero mask. - if (ISD::isBuildVectorAllZeros(Mask.getNode())) + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, expanding, or extending loads? - if (ISD::isBuildVectorAllOnes(Mask.getNode()) && + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), @@ -9650,6 +9938,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; + if (SDValue V = foldBoolSelectToLogic(N, DAG)) + return V; + // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) return DAG.getSelect(DL, VT, F, N2, N1); @@ -9734,10 +10025,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // If it's on the left side invert the predicate to simplify logic below. SDValue Other; ISD::CondCode SatCC = CC; - if (ISD::isBuildVectorAllOnes(N1.getNode())) { + if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) { Other = N2; SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); - } else if (ISD::isBuildVectorAllOnes(N2.getNode())) { + } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) { Other = N1; } @@ -9758,7 +10049,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { (OpLHS == CondLHS || OpRHS == CondLHS)) return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS); - if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) && + if (OpRHS.getOpcode() == CondRHS.getOpcode() && + (OpRHS.getOpcode() == ISD::BUILD_VECTOR || + OpRHS.getOpcode() == ISD::SPLAT_VECTOR) && CondLHS == OpLHS) { // If the RHS is a constant we have to reverse the const // canonicalization. @@ -9779,54 +10072,71 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // the left side invert the predicate to simplify logic below. SDValue Other; ISD::CondCode SatCC = CC; - if (ISD::isBuildVectorAllZeros(N1.getNode())) { + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { Other = N2; SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType()); - } else if (ISD::isBuildVectorAllZeros(N2.getNode())) { + } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) { Other = N1; } - if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) { + if (Other && Other.getNumOperands() == 2) { SDValue CondRHS = RHS; SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); - // Look for a general sub with unsigned saturation first. - // x >= y ? x-y : 0 --> usubsat x, y - // x > y ? x-y : 0 --> usubsat x, y - if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) && - Other.getOpcode() == ISD::SUB && OpRHS == CondRHS) - return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); - - if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) { - if (isa<BuildVectorSDNode>(CondRHS)) { - // If the RHS is a constant we have to reverse the const - // canonicalization. - // x > C-1 ? x+-C : 0 --> usubsat x, C - auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { - return (!Op && !Cond) || - (Op && Cond && - Cond->getAPIntValue() == (-Op->getAPIntValue() - 1)); - }; - if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && - ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, - /*AllowUndefs*/ true)) { - OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - OpRHS); - return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); - } + if (Other.getOpcode() == ISD::SUB && + LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS && + OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) { + // Look for a general sub with unsigned saturation first. + // zext(x) >= y ? x - trunc(y) : 0 + // --> usubsat(x,trunc(umin(y,SatLimit))) + // zext(x) > y ? x - trunc(y) : 0 + // --> usubsat(x,trunc(umin(y,SatLimit))) + if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) + return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG, + DL); + } + + if (OpLHS == LHS) { + // Look for a general sub with unsigned saturation first. + // x >= y ? x-y : 0 --> usubsat x, y + // x > y ? x-y : 0 --> usubsat x, y + if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) && + Other.getOpcode() == ISD::SUB && OpRHS == CondRHS) + return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); + + if (OpRHS.getOpcode() == ISD::BUILD_VECTOR || + OpRHS.getOpcode() == ISD::SPLAT_VECTOR) { + if (CondRHS.getOpcode() == ISD::BUILD_VECTOR || + CondRHS.getOpcode() == ISD::SPLAT_VECTOR) { + // If the RHS is a constant we have to reverse the const + // canonicalization. + // x > C-1 ? x+-C : 0 --> usubsat x, C + auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + return (!Op && !Cond) || + (Op && Cond && + Cond->getAPIntValue() == (-Op->getAPIntValue() - 1)); + }; + if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, + /*AllowUndefs*/ true)) { + OpRHS = DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), OpRHS); + return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); + } - // Another special case: If C was a sign bit, the sub has been - // canonicalized into a xor. - // FIXME: Would it be better to use computeKnownBits to determine - // whether it's safe to decanonicalize the xor? - // x s< 0 ? x^C : 0 --> usubsat x, C - if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use computeKnownBits to determine + // whether it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> usubsat x, C + APInt SplatValue; if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - OpRHSConst->getAPIntValue().isSignMask()) { - // Note that we have to rebuild the RHS constant here to ensure - // we don't rely on particular values of undef lanes. - OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT); + ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) && + ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) && + SplatValue.isSignMask()) { + // Note that we have to rebuild the RHS constant here to + // ensure we don't rely on particular values of undef lanes. + OpRHS = DAG.getConstant(SplatValue, DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } } @@ -9839,11 +10149,11 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. - // Fold (vselect (build_vector all_ones), N1, N2) -> N1 - if (ISD::isBuildVectorAllOnes(N0.getNode())) + // Fold (vselect all_ones, N1, N2) -> N1 + if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) return N1; - // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 - if (ISD::isBuildVectorAllZeros(N0.getNode())) + // Fold (vselect all_zeros, N1, N2) -> N2 + if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) return N2; // The ConvertSelectToConcatVector function is assuming both the above @@ -9913,9 +10223,62 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { bool PreferSetCC = N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; - SDValue Combined = SimplifySetCC( - N->getValueType(0), N->getOperand(0), N->getOperand(1), - cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); + ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); + EVT VT = N->getValueType(0); + + // SETCC(FREEZE(X), CONST, Cond) + // => + // FREEZE(SETCC(X, CONST, Cond)) + // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond) + // isn't equivalent to true or false. + // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to + // FREEZE(SETCC(X, -128, SETULT)) because X can be poison. + // + // This transformation is beneficial because visitBRCOND can fold + // BRCOND(FREEZE(X)) to BRCOND(X). + + // Conservatively optimize integer comparisons only. + if (PreferSetCC) { + // Do this only when SETCC is going to be used by BRCOND. + + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + bool Updated = false; + + // Is 'X Cond C' always true or false? + auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { + bool False = (Cond == ISD::SETULT && C->isNullValue()) || + (Cond == ISD::SETLT && C->isMinSignedValue()) || + (Cond == ISD::SETUGT && C->isAllOnesValue()) || + (Cond == ISD::SETGT && C->isMaxSignedValue()); + bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) || + (Cond == ISD::SETLE && C->isMaxSignedValue()) || + (Cond == ISD::SETUGE && C->isNullValue()) || + (Cond == ISD::SETGE && C->isMinSignedValue()); + return True || False; + }; + + if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) { + if (!IsAlwaysTrueOrFalse(Cond, N1C)) { + N0 = N0->getOperand(0); + Updated = true; + } + } + if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) { + if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), + N0C)) { + N1 = N1->getOperand(0); + Updated = true; + } + } + + if (Updated) + return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond)); + } + + SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond, + SDLoc(N), !PreferSetCC); if (!Combined) return SDValue(); @@ -9949,6 +10312,77 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { return SDValue(); } +/// Check if N satisfies: +/// N is used once. +/// N is a Load. +/// The load is compatible with ExtOpcode. It means +/// If load has explicit zero/sign extension, ExpOpcode must have the same +/// extension. +/// Otherwise returns true. +static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { + if (!N.hasOneUse()) + return false; + + if (!isa<LoadSDNode>(N)) + return false; + + LoadSDNode *Load = cast<LoadSDNode>(N); + ISD::LoadExtType LoadExt = Load->getExtensionType(); + if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD) + return true; + + // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same + // extension. + if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) || + (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND)) + return false; + + return true; +} + +/// Fold +/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) +/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y) +/// (aext (select c, load x, load y)) -> (select c, extload x, extload y) +/// This function is called by the DAGCombiner when visiting sext/zext/aext +/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND) && + "Expected EXTEND dag node in input!"); + + if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) || + !N0.hasOneUse()) + return SDValue(); + + SDValue Op1 = N0->getOperand(1); + SDValue Op2 = N0->getOperand(2); + if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode)) + return SDValue(); + + auto ExtLoadOpcode = ISD::EXTLOAD; + if (Opcode == ISD::SIGN_EXTEND) + ExtLoadOpcode = ISD::SEXTLOAD; + else if (Opcode == ISD::ZERO_EXTEND) + ExtLoadOpcode = ISD::ZEXTLOAD; + + LoadSDNode *Load1 = cast<LoadSDNode>(Op1); + LoadSDNode *Load2 = cast<LoadSDNode>(Op2); + if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || + !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) + return SDValue(); + + SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); + SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2); + return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2); +} + /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext @@ -10481,6 +10915,128 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, return SDValue(); } +SDValue DAGCombiner::foldSextSetcc(SDNode *N) { + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::SETCC) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + EVT VT = N->getValueType(0); + EVT N00VT = N00.getValueType(); + SDLoc DL(N); + + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // the same size as the compared operands. Try to optimize sext(setcc()) + // if this is the case. + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(N00VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + EVT SVT = getSetCCResultType(N00VT); + + // If we already have the desired type, don't change it. + if (SVT != N0.getValueType()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) + return DAG.getSetCC(DL, VT, N00, N01, CC); + + // If the desired elements are smaller or larger than the source + // elements, we can use a matching integer vector type and then + // truncate/sign extend. + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVecType) { + SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); + return DAG.getSExtOrTrunc(VsetCC, DL, VT); + } + } + + // Try to eliminate the sext of a setcc by zexting the compare operands. + if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) && + !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) { + bool IsSignedCmp = ISD::isSignedIntSetCC(CC); + unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + + // We have an unsupported narrow vector compare op that would be legal + // if extended to the destination type. See if the compare operands + // can be freely extended to the destination type. + auto IsFreeToExtend = [&](SDValue V) { + if (isConstantOrConstantVector(V, /*NoOpaques*/ true)) + return true; + // Match a simple, non-extended load that can be converted to a + // legal {z/s}ext-load. + // TODO: Allow widening of an existing {z/s}ext-load? + if (!(ISD::isNON_EXTLoad(V.getNode()) && + ISD::isUNINDEXEDLoad(V.getNode()) && + cast<LoadSDNode>(V)->isSimple() && + TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType()))) + return false; + + // Non-chain users of this value must either be the setcc in this + // sequence or extends that can be folded into the new {z/s}ext-load. + for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // Skip uses of the chain and the setcc. + SDNode *User = *UI; + if (UI.getUse().getResNo() != 0 || User == N0.getNode()) + continue; + // Extra users must have exactly the same cast we are about to create. + // TODO: This restriction could be eased if ExtendUsesToFormExtLoad() + // is enhanced similarly. + if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT) + return false; + } + return true; + }; + + if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) { + SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00); + SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01); + return DAG.getSetCC(DL, VT, Ext0, Ext1, CC); + } + } + } + + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) + // Here, T can be 1 or -1, depending on the type of the setcc and + // getBooleanContents(). + unsigned SetCCWidth = N0.getScalarValueSizeInBits(); + + // To determine the "true" side of the select, we need to know the high bit + // of the value returned by the setcc if it evaluates to true. + // If the type of the setcc is i1, then the true case of the select is just + // sext(i1 1), that is, -1. + // If the type of the setcc is larger (say, i8) then the value of the high + // bit depends on getBooleanContents(), so ask TLI for a real "true" value + // of the appropriate width. + SDValue ExtTrueVal = (SetCCWidth == 1) + ? DAG.getAllOnesConstant(DL, VT) + : DAG.getBoolConstant(true, DL, VT, N00VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) + return SCC; + + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { + EVT SetCCVT = getSetCCResultType(N00VT); + // Don't do this transform for i1 because there's a select transform + // that would reverse it. + // TODO: We should not do this transform at all without a target hook + // because a sext is likely cheaper than a select? + if (SetCCVT.getScalarSizeInBits() != 1 && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); + return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -10612,76 +11168,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) return V; - if (N0.getOpcode() == ISD::SETCC) { - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - EVT N00VT = N00.getValueType(); - - // sext(setcc) -> sext_in_reg(vsetcc) for vectors. - // Only do this before legalize for now. - if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(N00VT) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - // On some architectures (such as SSE/NEON/etc) the SETCC result type is - // of the same size as the compared operands. Only optimize sext(setcc()) - // if this is the case. - EVT SVT = getSetCCResultType(N00VT); - - // If we already have the desired type, don't change it. - if (SVT != N0.getValueType()) { - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(DL, VT, N00, N01, CC); - - // If the desired elements are smaller or larger than the source - // elements, we can use a matching integer vector type and then - // truncate/sign extend. - EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); - if (SVT == MatchingVecType) { - SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); - return DAG.getSExtOrTrunc(VsetCC, DL, VT); - } - } - } - - // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) - // Here, T can be 1 or -1, depending on the type of the setcc and - // getBooleanContents(). - unsigned SetCCWidth = N0.getScalarValueSizeInBits(); - - // To determine the "true" side of the select, we need to know the high bit - // of the value returned by the setcc if it evaluates to true. - // If the type of the setcc is i1, then the true case of the select is just - // sext(i1 1), that is, -1. - // If the type of the setcc is larger (say, i8) then the value of the high - // bit depends on getBooleanContents(), so ask TLI for a real "true" value - // of the appropriate width. - SDValue ExtTrueVal = (SetCCWidth == 1) - ? DAG.getAllOnesConstant(DL, VT) - : DAG.getBoolConstant(true, DL, VT, N00VT); - SDValue Zero = DAG.getConstant(0, DL, VT); - if (SDValue SCC = - SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) - return SCC; - - if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { - EVT SetCCVT = getSetCCResultType(N00VT); - // Don't do this transform for i1 because there's a select transform - // that would reverse it. - // TODO: We should not do this transform at all without a target hook - // because a sext is likely cheaper than a select? - if (SetCCVT.getScalarSizeInBits() != 1 && - (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { - SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); - return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); - } - } - } + if (SDValue V = foldSextSetcc(N)) + return V; // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -10733,6 +11221,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + return Res; + return SDValue(); } @@ -11045,6 +11536,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + return Res; + return SDValue(); } @@ -11197,6 +11691,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + return Res; + return SDValue(); } @@ -11542,14 +12039,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) - if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || - N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || - N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && - N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) { - if (!LegalOperations || - TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) - return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, - N0.getOperand(0)); + // if x is small enough or if we know that x has more than 1 sign bit and the + // sign_extend_inreg is extending from one of them. + if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + SDValue N00 = N0.getOperand(0); + unsigned N00Bits = N00.getScalarValueSizeInBits(); + unsigned DstElts = N0.getValueType().getVectorMinNumElements(); + unsigned SrcElts = N00.getValueType().getVectorMinNumElements(); + bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; + APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); + if ((N00Bits == ExtVTBits || + (!IsZext && (N00Bits < ExtVTBits || + (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < + ExtVTBits))) && + (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) + return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); } // fold (sext_in_reg (zext x)) -> (sext x) @@ -11610,6 +12117,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && @@ -11671,28 +12179,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - - // sext_vector_inreg(undef) = 0 because the top bit will all be the same. - if (N0.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); - - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) - return Res; - - if (SimplifyDemandedVectorElts(SDValue(N, 0))) - return SDValue(N, 0); - - return SDValue(); -} - -SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { +SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // zext_vector_inreg(undef) = 0 because the top bits will be zero. + // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); @@ -11812,6 +12303,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + if (SDValue V = foldSubToUSubSat(VT, N0.getNode())) + return V; + // Attempt to pre-truncate BUILD_VECTOR sources. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && @@ -12013,6 +12507,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); } } + break; + case ISD::USUBSAT: + // Truncate the USUBSAT only if LHS is a known zero-extension, its not + // enough to know that the upper bits are zero we must ensure that we don't + // introduce an extra truncate. + if (!LegalOperations && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <= + VT.getScalarSizeInBits() && + hasOperation(N0.getOpcode(), VT)) { + return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1), + DAG, SDLoc(N)); + } + break; } return SDValue(); @@ -12141,7 +12649,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { VT.getVectorElementType()); // If the input is a constant, let getNode fold it. - if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + if (isIntOrFPConstant(N0)) { // If we can't allow illegal operations, we need to check that this is just // a fp -> int or int -> conversion and that the resulting operation will // be legal. @@ -12374,12 +12882,7 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { SDValue DAGCombiner::visitFREEZE(SDNode *N) { SDValue N0 = N->getOperand(0); - // (freeze (freeze x)) -> (freeze x) - if (N0.getOpcode() == ISD::FREEZE) - return N0; - - // If the input is a constant, return it. - if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) + if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; return SDValue(); @@ -12500,11 +13003,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return DAG.getBuildVector(VT, DL, Ops); } -static bool isContractable(SDNode *N) { - SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasAllowReassociation(); -} - /// Try to perform FMA combining on a given FADD node. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); @@ -12526,16 +13024,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); - if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) + if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. @@ -12547,7 +13044,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. @@ -12736,15 +13233,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); const SDNodeFlags Flags = N->getFlags(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the subtraction is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); - if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) + if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. @@ -12757,7 +13253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) @@ -12887,13 +13383,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } } + auto isReassociable = [Options](SDNode *N) { + return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + }; + + auto isContractableAndReassociableFMUL = [isContractableFMUL, + isReassociable](SDValue N) { + return isContractableFMUL(N) && isReassociable(N.getNode()); + }; + // More folding opportunities when target permits. - if (Aggressive) { + if (Aggressive && isReassociable(N)) { + bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && - N0.getOperand(2)->hasOneUse()) { + isContractableAndReassociableFMUL(N0.getOperand(2)) && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -12905,7 +13411,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N1.getOperand(2)) && + isContractableAndReassociableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -12916,7 +13422,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0)); } - // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -12924,7 +13429,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020) && + if (isContractableAndReassociableFMUL(N020) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode( @@ -12948,7 +13453,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002) && + if (isContractableAndReassociableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode( @@ -12970,7 +13475,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); - if (isContractableFMUL(N120) && + if (isContractableAndReassociableFMUL(N120) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N120.getValueType())) { SDValue N1200 = N120.getOperand(0); @@ -12997,7 +13502,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); SDValue N102 = CvtSrc.getOperand(2); - if (isContractableFMUL(N102) && + if (isContractableAndReassociableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); @@ -13933,13 +14438,25 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue N1 = N->getOperand(1); if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)) { + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0).getValueType(); + + // Always fold no-op FP casts. + if (N1VT == N1Op0VT) + return true; + // Do not optimize out type conversion of f128 type yet. // For some targets like x86_64, configuration is changed to keep one f128 // value in one SSE register, but instruction selection cannot handle // FCOPYSIGN on SSE registers yet. - EVT N1VT = N1->getValueType(0); - EVT N1Op0VT = N1->getOperand(0).getValueType(); - return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + if (N1Op0VT == MVT::f128) + return false; + + // Avoid mismatched vector operand types, for better instruction selection. + if (N1Op0VT.isVector()) + return false; + + return true; } return false; } @@ -15971,12 +16488,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // Prepare the argument for the new token factor for all the slices. SmallVector<SDValue, 8> ArgChains; - for (SmallVectorImpl<LoadedSlice>::const_iterator - LSIt = LoadedSlices.begin(), - LSItEnd = LoadedSlices.end(); - LSIt != LSItEnd; ++LSIt) { - SDValue SliceInst = LSIt->loadSlice(); - CombineTo(LSIt->Inst, SliceInst, true); + for (const LoadedSlice &LS : LoadedSlices) { + SDValue SliceInst = LS.loadSlice(); + CombineTo(LS.Inst, SliceInst, true); if (SliceInst.getOpcode() != ISD::LOAD) SliceInst = SliceInst.getOperand(0); assert(SliceInst->getOpcode() == ISD::LOAD && @@ -16408,6 +16922,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( if (NumStores < 2) return false; + assert((!UseTrunc || !UseVector) && + "This optimization cannot emit a vector truncating store"); + // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); @@ -16631,7 +17148,7 @@ void DAGCombiner::getStoreMergeCandidates( case StoreSource::Constant: if (NoTypeMatch) return false; - if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) + if (!isIntOrFPConstant(OtherBC)) return false; break; case StoreSource::Extract: @@ -16903,6 +17420,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + bool UseTrunc = LastIntegerTrunc && !UseVector; // Check if we found a legal integer type that creates a meaningful // merge. @@ -16933,8 +17451,9 @@ bool DAGCombiner::tryStoreMergeOfConstants( continue; } - MadeChange |= mergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); + MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + /*IsConstantSrc*/ true, + UseVector, UseTrunc); // Remove merged stores for next iteration. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); @@ -17003,7 +17522,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts( } MadeChange |= mergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); + StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false, + /*UseVector*/ true, /*UseTrunc*/ false); StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); NumConsecutiveStores -= NumStoresToMerge; @@ -17022,8 +17542,6 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; bool MadeChange = false; - int64_t StartAddress = StoreNodes[0].OffsetFromBase; - // Look for load nodes which are used by the stored values. SmallVector<MemOpLink, 8> LoadNodes; @@ -17091,7 +17609,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned LastLegalIntegerType = 1; bool isDereferenceable = true; bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; + int64_t StartAddress = LoadNodes[0].OffsetFromBase; SDValue LoadChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads must share the same chain. @@ -17582,6 +18100,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && ST->isUnindexed() && ST->isSimple() && + Ld->getAddressSpace() == ST->getAddressSpace() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -17595,7 +18114,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && - ST->getMemoryVT() == ST1->getMemoryVT()) { + ST->getMemoryVT() == ST1->getMemoryVT() && + ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. return Chain; @@ -17606,7 +18126,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // BaseIndexOffset and the code below requires knowing the size // of a vector, so bail out if MemoryVT is scalable. !ST->getMemoryVT().isScalableVector() && - !ST1->getMemoryVT().isScalableVector()) { + !ST1->getMemoryVT().isScalableVector() && + ST->getAddressSpace() == ST1->getAddressSpace()) { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits(); @@ -17625,10 +18146,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. - if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) - && Value.getNode()->hasOneUse() && ST->isUnindexed() && - TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), - ST->getMemoryVT())) { + if ((Value.getOpcode() == ISD::FP_ROUND || + Value.getOpcode() == ISD::TRUNCATE) && + Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), + ST->getMemoryVT(), LegalOperations)) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemoryVT(), ST->getMemOperand()); } @@ -18086,26 +18608,19 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, Alignment = NewAlign; - SDValue NewPtr = OriginalLoad->getBasePtr(); - SDValue Offset; - EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { - Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); - Offset = DAG.getNode( - ISD::MUL, DL, PtrType, Offset, - DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); // Discard the pointer info except the address space because the memory // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); } - NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL); + SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), + InVecVT, EltNo); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -18710,6 +19225,9 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, uint64_t InVT1Size = InVT1.getFixedSizeInBits(); uint64_t InVT2Size = InVT2.getFixedSizeInBits(); + assert(InVT2Size <= InVT1Size && + "Inputs must be sorted to be in non-increasing vector size order."); + // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { @@ -18736,7 +19254,10 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // Since we now have shorter input vectors, adjust the offset of the // second vector's start. Vec2Offset = NumElems; - } else if (InVT2Size <= InVT1Size) { + } else { + assert(InVT2Size <= InVT1Size && + "Second input is not going to be larger than the first one."); + // VecIn1 is wider than the output, and we have another, possibly // smaller input. Pad the smaller input with undefs, shuffle at the // input vector width, and extract the output. @@ -18755,11 +19276,6 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); } ShuffleNumElems = NumElems * 2; - } else { - // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider - // than VecIn1. We can't handle this for now - this case will disappear - // when we start sorting the vectors by type. - return SDValue(); } } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) { SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); @@ -18884,6 +19400,15 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { return DAG.getBitcast(VT, Shuf); } +// FIXME: promote to STLExtras. +template <typename R, typename T> +static auto getFirstIndexOf(R &&Range, const T &Val) { + auto I = find(Range, Val); + if (I == Range.end()) + return static_cast<decltype(std::distance(Range.begin(), I))>(-1); + return std::distance(Range.begin(), I); +} + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If the types of the vectors we're extracting from allow it, // turn this into a vector_shuffle node. @@ -18952,9 +19477,11 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { // Have we seen this input vector before? // The vectors are expected to be tiny (usually 1 or 2 elements), so using // a map back from SDValues to numbers isn't worth it. - unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec)); - if (Idx == VecIn.size()) + int Idx = getFirstIndexOf(VecIn, ExtractedFromVec); + if (Idx == -1) { // A new source vector? + Idx = VecIn.size(); VecIn.push_back(ExtractedFromVec); + } VectorMask[i] = Idx; } @@ -18989,7 +19516,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { unsigned SplitSize = NearestPow2 / 2; EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), SplitSize); - if (TLI.isTypeLegal(SplitVT)) { + if (TLI.isTypeLegal(SplitVT) && + SplitSize + SplitVT.getVectorNumElements() <= + InVT.getVectorNumElements()) { SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, DAG.getVectorIdxConstant(SplitSize, DL)); SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, @@ -19008,9 +19537,28 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { } } - // TODO: We want to sort the vectors by descending length, so that adjacent - // pairs have similar length, and the longer vector is always first in the - // pair. + // Sort input vectors by decreasing vector element count, + // while preserving the relative order of equally-sized vectors. + // Note that we keep the first "implicit zero vector as-is. + SmallVector<SDValue, 8> SortedVecIn(VecIn); + llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(), + [](const SDValue &a, const SDValue &b) { + return a.getValueType().getVectorNumElements() > + b.getValueType().getVectorNumElements(); + }); + + // We now also need to rebuild the VectorMask, because it referenced element + // order in VecIn, and we just sorted them. + for (int &SourceVectorIndex : VectorMask) { + if (SourceVectorIndex <= 0) + continue; + unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]); + assert(Idx > 0 && Idx < SortedVecIn.size() && + VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure"); + SourceVectorIndex = Idx; + } + + VecIn = std::move(SortedVecIn); // TODO: Should this fire if some of the input vectors has illegal type (like // it does now), or should we let legalization run its course first? @@ -19183,13 +19731,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // A splat of a single element is a SPLAT_VECTOR if supported on the target. - if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) - if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { - assert(!V.isUndef() && "Splat of undef should have been handled earlier"); - return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); - } - // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -19231,6 +19772,14 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecToShuffle(N)) return V; + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + // Do this late as some of the above may replace the splat. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + return SDValue(); } @@ -19879,7 +20428,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && - V.getOperand(0).getValueType().isVector()) { + V.getOperand(0).getValueType().isVector() && + (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) { SDValue SrcOp = V.getOperand(0); EVT SrcVT = SrcOp.getValueType(); unsigned SrcNumElts = SrcVT.getVectorMinNumElements(); @@ -20052,6 +20602,9 @@ static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] == -1) continue; + // If we reference the upper (undef) subvector then the element is undef. + if ((Mask[i] % NumElts) >= HalfNumElts) + continue; int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; if (i < HalfNumElts) Mask0[i] = M; @@ -20213,7 +20766,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, // generating a splat; semantically, this is fine, but it's likely to // generate low-quality code if the target can't reconstruct an appropriate // shuffle. - if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) + if (!Op.isUndef() && !isIntOrFPConstant(Op)) if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); @@ -20798,44 +21351,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } - if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { - // Canonicalize shuffles according to rules: - // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) - // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) - // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) - if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && - N0.getOpcode() != ISD::VECTOR_SHUFFLE) { - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(N1->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SDValue SV0 = N1->getOperand(0); - SDValue SV1 = N1->getOperand(1); - bool HasSameOp0 = N0 == SV0; - bool IsSV1Undef = SV1.isUndef(); - if (HasSameOp0 || IsSV1Undef || N0 == SV1) - // Commute the operands of this shuffle so merging below will trigger. - return DAG.getCommutedVectorShuffle(*SVN); - } - - // Canonicalize splat shuffles to the RHS to improve merging below. - // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u)) - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && - N1.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(N0)->isSplat() && - !cast<ShuffleVectorSDNode>(N1)->isSplat()) { - return DAG.getCommutedVectorShuffle(*SVN); - } - } - // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. - // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask). - auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN, - ShuffleVectorSDNode *OtherSVN, SDValue N1, - SDValue &SV0, SDValue &SV1, - SmallVectorImpl<int> &Mask) -> bool { + // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false + // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true + auto MergeInnerShuffle = + [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN, + ShuffleVectorSDNode *OtherSVN, SDValue N1, + const TargetLowering &TLI, SDValue &SV0, SDValue &SV1, + SmallVectorImpl<int> &Mask) -> bool { // Don't try to fold splats; they're likely to simplify somehow, or they // might be free. if (OtherSVN->isSplat()) @@ -20852,6 +21376,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { continue; } + if (Commute) + Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts); + SDValue CurrentVec; if (Idx < (int)NumElts) { // This shuffle index refers to the inner shuffle N0. Lookup the inner @@ -20922,44 +21449,161 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Bail out if we cannot convert the shuffle pair into a single shuffle. return false; } - return true; + + if (llvm::all_of(Mask, [](int M) { return M < 0; })) + return true; + + // Avoid introducing shuffles with illegal mask. + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + if (TLI.isShuffleMaskLegal(Mask, VT)) + return true; + + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(Mask); + return TLI.isShuffleMaskLegal(Mask, VT); }; - // Try to fold according to rules: - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - // Don't try to fold shuffles with illegal type. - // Only fold if this shuffle is the only user of the other shuffle. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && - Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { - ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(OtherSV->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SDValue SV0, SV1; - SmallVector<int, 4> Mask; - if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) { - // Check if all indices in Mask are Undef. In case, propagate Undef. - if (llvm::all_of(Mask, [](int M) { return M < 0; })) - return DAG.getUNDEF(VT); - - if (!SV0.getNode()) - SV0 = DAG.getUNDEF(VT); - if (!SV1.getNode()) - SV1 = DAG.getUNDEF(VT); - - // Avoid introducing shuffles with illegal mask. - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && + N0.getOpcode() != ISD::VECTOR_SHUFFLE) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.isUndef(); + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so merging below will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Canonicalize splat shuffles to the RHS to improve merging below. + // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u)) + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && + N1.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(N0)->isSplat() && + !cast<ShuffleVectorSDNode>(N1)->isSplat()) { + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // Don't try to fold shuffles with illegal type. + // Only fold if this shuffle is the only user of the other shuffle. + // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well. + for (int i = 0; i != 2; ++i) { + if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE && + N->isOnlyUserOf(N->getOperand(i).getNode())) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i)); + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0, SV1; + SmallVector<int, 4> Mask; + if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI, + SV0, SV1, Mask)) { + // Check if all indices in Mask are Undef. In case, propagate Undef. + if (llvm::all_of(Mask, [](int M) { return M < 0; })) + return DAG.getUNDEF(VT); + + return DAG.getVectorShuffle(VT, SDLoc(N), + SV0 ? SV0 : DAG.getUNDEF(VT), + SV1 ? SV1 : DAG.getUNDEF(VT), Mask); + } + } + } + + // Merge shuffles through binops if we are able to merge it with at least + // one other shuffles. + // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef) + // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) + unsigned SrcOpcode = N0.getOpcode(); + if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) && + (N1.isUndef() || + (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) { + // Get binop source ops, or just pass on the undef. + SDValue Op00 = N0.getOperand(0); + SDValue Op01 = N0.getOperand(1); + SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0); + SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1); + // TODO: We might be able to relax the VT check but we don't currently + // have any isBinOp() that has different result/ops VTs so play safe until + // we have test coverage. + if (Op00.getValueType() == VT && Op10.getValueType() == VT && + Op01.getValueType() == VT && Op11.getValueType() == VT && + (Op00.getOpcode() == ISD::VECTOR_SHUFFLE || + Op10.getOpcode() == ISD::VECTOR_SHUFFLE || + Op01.getOpcode() == ISD::VECTOR_SHUFFLE || + Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) { + auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1, + SmallVectorImpl<int> &Mask, bool LeftOp, + bool Commute) { + SDValue InnerN = Commute ? N1 : N0; + SDValue Op0 = LeftOp ? Op00 : Op01; + SDValue Op1 = LeftOp ? Op10 : Op11; + if (Commute) + std::swap(Op0, Op1); + // Only accept the merged shuffle if we don't introduce undef elements, + // or the inner shuffle already contained undef elements. + auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0); + return SVN0 && InnerN->isOnlyUserOf(SVN0) && + MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1, + Mask) && + (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) || + llvm::none_of(Mask, [](int M) { return M < 0; })); + }; + + // Ensure we don't increase the number of shuffles - we must merge a + // shuffle from at least one of the LHS and RHS ops. + bool MergedLeft = false; + SDValue LeftSV0, LeftSV1; + SmallVector<int, 4> LeftMask; + if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) || + CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) { + MergedLeft = true; + } else { + LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end()); + LeftSV0 = Op00, LeftSV1 = Op10; + } + + bool MergedRight = false; + SDValue RightSV0, RightSV1; + SmallVector<int, 4> RightMask; + if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) || + CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) { + MergedRight = true; + } else { + RightMask.assign(SVN->getMask().begin(), SVN->getMask().end()); + RightSV0 = Op01, RightSV1 = Op11; + } + + if (MergedLeft || MergedRight) { + SDLoc DL(N); + SDValue LHS = DAG.getVectorShuffle( + VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT), + LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask); + SDValue RHS = DAG.getVectorShuffle( + VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT), + RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask); + return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS); + } + } } } @@ -21174,7 +21818,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) - if (N0->getOpcode() == ISD::AND) { + if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), @@ -21775,6 +22419,50 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } +// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc. +SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + unsigned BinOpc = N1.getOpcode(); + if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) + return SDValue(); + + if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) + return SDValue(); + + // Fold select(cond, binop(x, y), binop(z, y)) + // --> binop(select(cond, x, z), y) + if (N1.getOperand(1) == N2.getOperand(1)) { + SDValue NewSel = + DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); + NewBinOp->setFlags(N1->getFlags()); + NewBinOp->intersectFlagsWith(N2->getFlags()); + return NewBinOp; + } + + // Fold select(cond, binop(x, y), binop(x, z)) + // --> binop(x, select(cond, y, z)) + // Second op VT might be different (e.g. shift amount type) + if (N1.getOperand(0) == N2.getOperand(0) && + VT == N1.getOperand(1).getValueType() && + VT == N2.getOperand(1).getValueType()) { + SDValue NewSel = + DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); + NewBinOp->setFlags(N1->getFlags()); + NewBinOp->intersectFlagsWith(N2->getFlags()); + return NewBinOp; + } + + // TODO: Handle isCommutativeBinOp patterns as well? + return SDValue(); +} + // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values. SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { SDValue N0 = N->getOperand(0); @@ -22426,12 +23114,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; - AliasResult AAResult = AA->alias( - MemoryLocation(MUC0.MMO->getValue(), Overlap0, - UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), - MemoryLocation(MUC1.MMO->getValue(), Overlap1, - UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())); - if (AAResult == NoAlias) + if (AA->isNoAlias( + MemoryLocation(MUC0.MMO->getValue(), Overlap0, + UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()))) return false; } @@ -22614,6 +23301,10 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; + // Do not handle stores to opaque types + if (St->getMemoryVT().isZeroSized()) + return false; + // BaseIndexOffset assumes that offsets are fixed-size, which // is not valid for scalable vectors where the offsets are // scaled by `vscale`, so bail out early. @@ -22624,6 +23315,9 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { + if (Chain->getMemoryVT().isScalableVector()) + return false; + // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 62f7f3d98ba6..4ca731cfdf62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -238,38 +238,6 @@ void FastISel::flushLocalValueMap() { SavedInsertPt = FuncInfo.InsertPt; } -bool FastISel::hasTrivialKill(const Value *V) { - // Don't consider constants or arguments to have trivial kills. - const Instruction *I = dyn_cast<Instruction>(V); - if (!I) - return false; - - // No-op casts are trivially coalesced by fast-isel. - if (const auto *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) - return false; - - // Even the value might have only one use in the LLVM IR, it is possible that - // FastISel might fold the use into another instruction and now there is more - // than one use at the Machine Instruction level. - Register Reg = lookUpRegForValue(V); - if (Reg && !MRI.use_empty(Reg)) - return false; - - // GEPs with all zero indices are trivially coalesced by fast-isel. - if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) - if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) - return false; - - // Only instructions with a single use in the same basic block are considered - // to have trivial kills. - return I->hasOneUse() && - !(I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt || - I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); -} - Register FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. @@ -342,8 +310,8 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) { Register IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); if (IntegerReg) - Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, - /*Op0IsKill=*/false); + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg); } } } else if (const auto *Op = dyn_cast<Operator>(V)) { @@ -415,27 +383,22 @@ void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) { } } -std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) { +Register FastISel::getRegForGEPIndex(const Value *Idx) { Register IdxN = getRegForValue(Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. - return std::pair<Register, bool>(Register(), false); - - bool IdxNIsKill = hasTrivialKill(Idx); + return Register(); // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { - IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, - IdxNIsKill); - IdxNIsKill = true; + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); } else if (IdxVT.bitsGT(PtrVT)) { IdxN = - fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); - IdxNIsKill = true; + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); } - return std::pair<Register, bool>(IdxN, IdxNIsKill); + return IdxN; } void FastISel::recomputeInsertPt() { @@ -513,11 +476,10 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) return false; - bool Op1IsKill = hasTrivialKill(I->getOperand(1)); Register ResultReg = - fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, - CI->getZExtValue(), VT.getSimpleVT()); + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, CI->getZExtValue(), + VT.getSimpleVT()); if (!ResultReg) return false; @@ -529,7 +491,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -549,8 +510,8 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, - Op0IsKill, Imm, VT.getSimpleVT()); + Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Imm, + VT.getSimpleVT()); if (!ResultReg) return false; @@ -562,11 +523,10 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + ISDOpcode, Op0, Op1); if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. @@ -587,8 +547,6 @@ bool FastISel::selectGetElementPtr(const User *I) { if (isa<VectorType>(I->getType())) return false; - bool NIsKill = hasTrivialKill(I->getOperand(0)); - // Keep a running tab of the total offset to coalesce multiple N = N + Offset // into a single N = N + TotalOffset. uint64_t TotalOffs = 0; @@ -604,10 +562,9 @@ bool FastISel::selectGetElementPtr(const User *I) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { - N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - NIsKill = true; TotalOffs = 0; } } @@ -622,43 +579,38 @@ bool FastISel::selectGetElementPtr(const User *I) { uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; if (TotalOffs >= MaxOffs) { - N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - NIsKill = true; TotalOffs = 0; } continue; } if (TotalOffs) { - N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - NIsKill = true; TotalOffs = 0; } // N = N + Idx * ElementSize; uint64_t ElementSize = DL.getTypeAllocSize(Ty); - std::pair<Register, bool> Pair = getRegForGEPIndex(Idx); - Register IdxN = Pair.first; - bool IdxNIsKill = Pair.second; + Register IdxN = getRegForGEPIndex(Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; - IdxNIsKill = true; } - N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } } if (TotalOffs) { - N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } @@ -1081,9 +1033,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { for (auto &Arg : CLI.getArgs()) { Type *FinalType = Arg.Ty; if (Arg.IsByVal) - FinalType = cast<PointerType>(Arg.Ty)->getElementType(); + FinalType = Arg.IndirectType; bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( - FinalType, CLI.CallConv, CLI.IsVarArg); + FinalType, CLI.CallConv, CLI.IsVarArg, DL); ISD::ArgFlagsTy Flags; if (Arg.IsZExt) @@ -1096,6 +1048,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setSRet(); if (Arg.IsSwiftSelf) Flags.setSwiftSelf(); + if (Arg.IsSwiftAsync) + Flags.setSwiftAsync(); if (Arg.IsSwiftError) Flags.setSwiftError(); if (Arg.IsCFGuardTarget) @@ -1120,26 +1074,24 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // preallocated handling in the various CC lowering callbacks. Flags.setByVal(); } + MaybeAlign MemAlign = Arg.Alignment; if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) { - PointerType *Ty = cast<PointerType>(Arg.Ty); - Type *ElementTy = Ty->getElementType(); - unsigned FrameSize = - DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy); + unsigned FrameSize = DL.getTypeAllocSize(Arg.IndirectType); // For ByVal, alignment should come from FE. BE will guess if this info // is not there, but there are cases it cannot get right. - MaybeAlign FrameAlign = Arg.Alignment; - if (!FrameAlign) - FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL)); + if (!MemAlign) + MemAlign = Align(TLI.getByValTypeAlignment(Arg.IndirectType, DL)); Flags.setByValSize(FrameSize); - Flags.setByValAlign(*FrameAlign); + } else if (!MemAlign) { + MemAlign = DL.getABITypeAlign(Arg.Ty); } + Flags.setMemAlign(*MemAlign); if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); - CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); } @@ -1192,7 +1144,7 @@ bool FastISel::lowerCall(const CallInst *CI) { IsTailCall = false; if (IsTailCall && MF->getFunction() .getFnAttribute("disable-tail-calls") - .getValueAsString() == "true") + .getValueAsBool()) IsTailCall = false; CallLoweringInfo CLI; @@ -1304,9 +1256,21 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - *Op, DI->getVariable(), DI->getExpression()); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, + DI->getVariable(), DI->getExpression()); + + // If using instruction referencing, mutate this into a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto + // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. + if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) { + Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); + Builder->getOperand(1).ChangeToImmediate(0); + auto *NewExpr = + DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore); + Builder->getOperand(3).setMetadata(NewExpr); + } } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1322,9 +1286,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const Value *V = DI->getValue(); assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (!V || isa<UndefValue>(V)) { - // Currently the optimizer can produce this; insert an undef to - // help debugging. + if (!V || isa<UndefValue>(V) || DI->hasArgList()) { + // DI is either undef or cannot produce a valid DBG_VALUE, so produce an + // undef DBG_VALUE to terminate any prior location. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { @@ -1349,8 +1313,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, - DI->getVariable(), DI->getExpression()); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getVariable(), DI->getExpression()); + + // If using instruction referencing, mutate this into a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. + if (TM.Options.ValueTrackingVariableLocations) { + Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); + Builder->getOperand(1).ChangeToImmediate(0); + } } else { // We don't know how to handle other cases, so we drop. LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); @@ -1421,10 +1393,8 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { // Unhandled operand. Halt "fast" selection and bail. return false; - bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - Opcode, InputReg, InputRegIsKill); + Opcode, InputReg); if (!ResultReg) return false; @@ -1455,7 +1425,6 @@ bool FastISel::selectBitCast(const User *I) { Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. Register ResultReg; @@ -1472,7 +1441,7 @@ bool FastISel::selectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0); if (!ResultReg) return false; @@ -1648,12 +1617,11 @@ bool FastISel::selectFNeg(const User *I, const Value *In) { Register OpReg = getRegForValue(In); if (!OpReg) return false; - bool OpRegIsKill = hasTrivialKill(In); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(DL, I->getType()); Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, - OpReg, OpRegIsKill); + OpReg); if (ResultReg) { updateValueMap(I, ResultReg); return true; @@ -1668,18 +1636,18 @@ bool FastISel::selectFNeg(const User *I, const Value *In) { return false; Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BITCAST, OpReg, OpRegIsKill); + ISD::BITCAST, OpReg); if (!IntReg) return false; Register IntResultReg = fastEmit_ri_( - IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true, + IntVT.getSimpleVT(), ISD::XOR, IntReg, UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); if (!IntResultReg) return false; ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, - IntResultReg, /*Op0IsKill=*/true); + IntResultReg); if (!ResultReg) return false; @@ -1879,14 +1847,12 @@ bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } -unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, - bool /*Op0IsKill*/) { +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/) { return 0; } unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, - bool /*Op0IsKill*/, unsigned /*Op1*/, - bool /*Op1IsKill*/) { + unsigned /*Op1*/) { return 0; } @@ -1900,7 +1866,7 @@ unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, } unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, - bool /*Op0IsKill*/, uint64_t /*Imm*/) { + uint64_t /*Imm*/) { return 0; } @@ -1909,7 +1875,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, /// If that fails, it materializes the immediate into a register and try /// fastEmit_rr instead. Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, - bool Op0IsKill, uint64_t Imm, MVT ImmType) { + uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; @@ -1927,11 +1893,10 @@ Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Imm); if (ResultReg) return ResultReg; Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - bool IsImmKill = true; if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. @@ -1940,15 +1905,8 @@ Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); if (!MaterialReg) return 0; - // FIXME: If the materialized register here has no uses yet then this - // will be the first use and we should be able to mark it as killed. - // However, the local value area for materialising constant expressions - // grows down, not up, which means that any constant expressions we generate - // later which also use 'Imm' could be after this instruction and therefore - // after this kill. - IsImmKill = false; } - return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); + return fastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); } Register FastISel::createResultReg(const TargetRegisterClass *RC) { @@ -1982,8 +1940,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, } Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill) { + const TargetRegisterClass *RC, unsigned Op0) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -1991,10 +1948,10 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)); + .addReg(Op0); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)); + .addReg(Op0); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -2004,8 +1961,7 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, unsigned Op1, - bool Op1IsKill) { + unsigned Op1) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -2014,12 +1970,12 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)); + .addReg(Op0) + .addReg(Op1); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)); + .addReg(Op0) + .addReg(Op1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -2028,9 +1984,7 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, unsigned Op1, - bool Op1IsKill, unsigned Op2, - bool Op2IsKill) { + unsigned Op1, unsigned Op2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -2040,14 +1994,14 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addReg(Op2, getKillRegState(Op2IsKill)); + .addReg(Op0) + .addReg(Op1) + .addReg(Op2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addReg(Op2, getKillRegState(Op2IsKill)); + .addReg(Op0) + .addReg(Op1) + .addReg(Op2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -2056,7 +2010,7 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, uint64_t Imm) { + uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -2064,11 +2018,11 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op0) .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op0) .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); @@ -2078,8 +2032,7 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, uint64_t Imm1, - uint64_t Imm2) { + uint64_t Imm1, uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -2087,12 +2040,12 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op0) .addImm(Imm1) .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op0) .addImm(Imm1) .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -2122,8 +2075,7 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, unsigned Op1, - bool Op1IsKill, uint64_t Imm) { + unsigned Op1, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); Register ResultReg = createResultReg(RC); @@ -2132,13 +2084,13 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op0) + .addReg(Op1) .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op0) + .addReg(Op1) .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); @@ -2163,21 +2115,21 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, } Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, - bool Op0IsKill, uint32_t Idx) { + uint32_t Idx) { Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); + ResultReg).addReg(Op0, 0, Idx); return ResultReg; } /// Emit MachineInstrs to compute the value of Op with all but the least /// significant bit set to zero. -Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { - return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 32a4f60df097..85c6eca5775e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -192,10 +192,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo().CreateVariableSizedObject( Alignment <= StackAlign ? Align(1) : Alignment, AI); } - } - - // Look for inline asm that clobbers the SP register. - if (auto *Call = dyn_cast<CallBase>(&I)) { + } else if (auto *Call = dyn_cast<CallBase>(&I)) { + // Look for inline asm that clobbers the SP register. if (Call->isInlineAsm()) { Register SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -214,21 +212,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } - } - - // Look for calls to the @llvm.va_start intrinsic. We can omit some - // prologue boilerplate for variadic functions that don't examine their - // arguments. - if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { - if (II->getIntrinsicID() == Intrinsic::vastart) - MF->getFrameInfo().setHasVAStart(true); - } + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo().setHasVAStart(true); + } - // If we have a musttail call in a variadic function, we need to ensure we - // forward implicit register parameters. - if (const auto *CI = dyn_cast<CallInst>(&I)) { - if (CI->isMustTailCall() && Fn->isVarArg()) - MF->getFrameInfo().setHasMustTailInVarArgFunc(true); + // If we have a musttail call in a variadic function, we need to ensure + // we forward implicit register parameters. + if (const auto *CI = dyn_cast<CallInst>(&I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo().setHasMustTailInVarArgFunc(true); + } } // Mark values used outside their block as exported, by allocating @@ -333,14 +330,23 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, else if (Personality == EHPersonality::Wasm_CXX) { WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); - // Map all BB references in the WinEH data to MBBs. - DenseMap<BBOrMBB, BBOrMBB> NewMap; - for (auto &KV : EHInfo.EHPadUnwindMap) { + // Map all BB references in the Wasm EH data to MBBs. + DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest; + for (auto &KV : EHInfo.SrcToUnwindDest) { const auto *Src = KV.first.get<const BasicBlock *>(); - const auto *Dst = KV.second.get<const BasicBlock *>(); - NewMap[MBBMap[Src]] = MBBMap[Dst]; + const auto *Dest = KV.second.get<const BasicBlock *>(); + SrcToUnwindDest[MBBMap[Src]] = MBBMap[Dest]; + } + EHInfo.SrcToUnwindDest = std::move(SrcToUnwindDest); + DenseMap<BBOrMBB, SmallPtrSet<BBOrMBB, 4>> UnwindDestToSrcs; + for (auto &KV : EHInfo.UnwindDestToSrcs) { + const auto *Dest = KV.first.get<const BasicBlock *>(); + UnwindDestToSrcs[MBBMap[Dest]] = SmallPtrSet<BBOrMBB, 4>(); + for (const auto P : KV.second) + UnwindDestToSrcs[MBBMap[Dest]].insert( + MBBMap[P.get<const BasicBlock *>()]); } - EHInfo.EHPadUnwindMap = std::move(NewMap); + EHInfo.UnwindDestToSrcs = std::move(UnwindDestToSrcs); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a5978711b871..348fad6daf8f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -166,9 +166,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, assert(TRI->isTypeLegalForClass(*UseRC, VT) && "Incompatible phys register def and uses!"); DstRC = UseRC; - } else { - DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); - } + } else + DstRC = SrcRC; // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. @@ -684,144 +683,213 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, SD->setIsEmitted(); - if (SD->isInvalidated()) { - // An invalidated SDNode must generate an undef DBG_VALUE: although the - // original value is no longer computed, earlier DBG_VALUEs live ranges - // must not leak into later code. - auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); - MIB.addReg(0U); - MIB.addReg(0U, RegState::Debug); + ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps(); + assert(!LocationOps.empty() && "dbg_value with no location operands?"); + + if (SD->isInvalidated()) + return EmitDbgNoLocation(SD); + + // Emit variadic dbg_value nodes as DBG_VALUE_LIST. + if (SD->isVariadic()) { + // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)* + const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST); + // Build the DBG_VALUE_LIST instruction base. + auto MIB = BuildMI(*MF, DL, DbgValDesc); MIB.addMetadata(Var); MIB.addMetadata(Expr); + AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap); return &*MIB; } // Attempt to produce a DBG_INSTR_REF if we've been asked to. + // We currently exclude the possibility of instruction references for + // variadic nodes; if at some point we enable them, this should be moved + // above the variadic block. if (EmitDebugInstrRefs) if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap)) return InstrRef; - if (SD->getKind() == SDDbgValue::FRAMEIX) { - // Stack address; this needs to be lowered in target-dependent fashion. - // EmitTargetCodeForFrameDebugValue is responsible for allocation. - auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()); - if (SD->isIndirect()) - // Push [fi + 0] onto the DIExpression stack. - FrameMI.addImm(0); - else - // Push fi onto the DIExpression stack. - FrameMI.addReg(0); - return FrameMI.addMetadata(Var).addMetadata(Expr); - } - // Otherwise, we're going to create an instruction here. - const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); - MachineInstrBuilder MIB = BuildMI(*MF, DL, II); - if (SD->getKind() == SDDbgValue::SDNODE) { - SDNode *Node = SD->getSDNode(); - SDValue Op = SDValue(Node, SD->getResNo()); - // It's possible we replaced this SDNode with other(s) and therefore - // didn't generate code for it. It's better to catch these cases where - // they happen and transfer the debug info, but trying to guarantee that - // in all cases would be very fragile; this is a safeguard for any - // that were missed. - DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); - if (I==VRBaseMap.end()) - MIB.addReg(0U); // undef - else - AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, - /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); - } else if (SD->getKind() == SDDbgValue::VREG) { - MIB.addReg(SD->getVReg(), RegState::Debug); - } else if (SD->getKind() == SDDbgValue::CONST) { - const Value *V = SD->getConst(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - if (CI->getBitWidth() > 64) - MIB.addCImm(CI); + return EmitDbgValueFromSingleOp(SD, VRBaseMap); +} + +void InstrEmitter::AddDbgValueLocationOps( + MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, + ArrayRef<SDDbgOperand> LocationOps, + DenseMap<SDValue, Register> &VRBaseMap) { + for (const SDDbgOperand &Op : LocationOps) { + switch (Op.getKind()) { + case SDDbgOperand::FRAMEIX: + MIB.addFrameIndex(Op.getFrameIx()); + break; + case SDDbgOperand::VREG: + MIB.addReg(Op.getVReg(), RegState::Debug); + break; + case SDDbgOperand::SDNODE: { + SDValue V = SDValue(Op.getSDNode(), Op.getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + if (VRBaseMap.count(V) == 0) + MIB.addReg(0U); // undef else - MIB.addImm(CI->getSExtValue()); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - MIB.addFPImm(CF); - } else if (isa<ConstantPointerNull>(V)) { - // Note: This assumes that all nullptr constants are zero-valued. - MIB.addImm(0); - } else { - // Could be an Undef. In any case insert an Undef so we can see what we - // dropped. - MIB.addReg(0U); + AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap, + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } break; + case SDDbgOperand::CONST: { + const Value *V = Op.getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); + else + MIB.addImm(CI->getSExtValue()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MIB.addFPImm(CF); + } else if (isa<ConstantPointerNull>(V)) { + // Note: This assumes that all nullptr constants are zero-valued. + MIB.addImm(0); + } else { + // Could be an Undef. In any case insert an Undef so we can see what we + // dropped. + MIB.addReg(0U); + } + } break; } - } else { - // Insert an Undef so we can see what we dropped. - MIB.addReg(0U); } - - // Indirect addressing is indicated by an Imm as the second parameter. - if (SD->isIndirect()) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); - - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - - return &*MIB; } MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { - // Instruction referencing is still in a prototype state: for now we're only - // going to support SDNodes within a block. Copies are not supported, they - // don't actually define a value. - if (SD->getKind() != SDDbgValue::SDNODE) - return nullptr; - - SDNode *Node = SD->getSDNode(); - SDValue Op = SDValue(Node, SD->getResNo()); - DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); - if (I==VRBaseMap.end()) - return nullptr; // undef value: let EmitDbgValue produce a DBG_VALUE $noreg. - + assert(!SD->isVariadic()); + SDDbgOperand DbgOperand = SD->getLocationOps()[0]; MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); + const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); + + // Handle variable locations that don't actually depend on the instructions + // in the program: constants and stack locations. + if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX || + DbgOperand.getKind() == SDDbgOperand::CONST) + return EmitDbgValueFromSingleOp(SD, VRBaseMap); + + // It may not be immediately possible to identify the MachineInstr that + // defines a VReg, it can depend for example on the order blocks are + // emitted in. When this happens, or when further analysis is needed later, + // produce an instruction like this: + // + // DBG_INSTR_REF %0:gr64, 0, !123, !456 + // + // i.e., point the instruction at the vreg, and patch it up later in + // MachineFunction::finalizeDebugInstrRefs. + auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * { + auto MIB = BuildMI(*MF, DL, RefII); + MIB.addReg(VReg); + MIB.addImm(0); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + return MIB; + }; - // Try to pick out a defining instruction at this point. - unsigned VReg = getVR(Op, VRBaseMap); - MachineInstr *ResultInstr = nullptr; + // Try to find both the defined register and the instruction defining it. + MachineInstr *DefMI = nullptr; + unsigned VReg; - // No definition corresponds to scenarios where a vreg is live-in to a block, - // and doesn't have a defining instruction (yet). This can be patched up - // later; at this early stage of implementation, fall back to using DBG_VALUE. - if (!MRI->hasOneDef(VReg)) - return nullptr; + if (DbgOperand.getKind() == SDDbgOperand::VREG) { + VReg = DbgOperand.getVReg(); - MachineInstr &DefMI = *MRI->def_instr_begin(VReg); - // Some target specific opcodes can become copies. As stated above, we're - // ignoring those for now. - if (DefMI.isCopy() || DefMI.getOpcode() == TargetOpcode::SUBREG_TO_REG) - return nullptr; + // No definition means that block hasn't been emitted yet. Leave a vreg + // reference to be fixed later. + if (!MRI->hasOneDef(VReg)) + return EmitHalfDoneInstrRef(VReg); + + DefMI = &*MRI->def_instr_begin(VReg); + } else { + assert(DbgOperand.getKind() == SDDbgOperand::SDNODE); + // Look up the corresponding VReg for the given SDNode, if any. + SDNode *Node = DbgOperand.getSDNode(); + SDValue Op = SDValue(Node, DbgOperand.getResNo()); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); + // No VReg -> produce a DBG_VALUE $noreg instead. + if (I==VRBaseMap.end()) + return EmitDbgNoLocation(SD); + + // Try to pick out a defining instruction at this point. + VReg = getVR(Op, VRBaseMap); + + // Again, if there's no instruction defining the VReg right now, fix it up + // later. + if (!MRI->hasOneDef(VReg)) + return EmitHalfDoneInstrRef(VReg); + + DefMI = &*MRI->def_instr_begin(VReg); + } + + // Avoid copy like instructions: they don't define values, only move them. + // Leave a virtual-register reference until it can be fixed up later, to find + // the underlying value definition. + if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) + return EmitHalfDoneInstrRef(VReg); - const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); auto MIB = BuildMI(*MF, DL, RefII); - // Find the operand which defines the specified VReg. + // Find the operand number which defines the specified VReg. unsigned OperandIdx = 0; - for (const auto &MO : DefMI.operands()) { + for (const auto &MO : DefMI->operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) break; ++OperandIdx; } - assert(OperandIdx < DefMI.getNumOperands()); + assert(OperandIdx < DefMI->getNumOperands()); // Make the DBG_INSTR_REF refer to that instruction, and that operand. - unsigned InstrNum = DefMI.getDebugInstrNum(); + unsigned InstrNum = DefMI->getDebugInstrNum(); MIB.addImm(InstrNum); MIB.addImm(OperandIdx); MIB.addMetadata(Var); MIB.addMetadata(Expr); - ResultInstr = &*MIB; - return ResultInstr; + return &*MIB; +} + +MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { + // An invalidated SDNode must generate an undef DBG_VALUE: although the + // original value is no longer computed, earlier DBG_VALUEs live ranges + // must not leak into later code. + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); + DebugLoc DL = SD->getDebugLoc(); + auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); + MIB.addReg(0U); + MIB.addReg(0U, RegState::Debug); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + return &*MIB; +} + +MachineInstr * +InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap) { + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); + DebugLoc DL = SD->getDebugLoc(); + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + + assert(SD->getLocationOps().size() == 1 && + "Non variadic dbg_value should have only one location op"); + + // Emit non-variadic dbg_value nodes as DBG_VALUE. + // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr + auto MIB = BuildMI(*MF, DL, II); + AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap); + + if (SD->isIndirect()) + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); + + return MIB.addMetadata(Var).addMetadata(Expr); } MachineInstr * @@ -1116,10 +1184,10 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::LIFETIME_START: case ISD::LIFETIME_END: { - unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? - TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; - - FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) + ? TargetOpcode::LIFETIME_START + : TargetOpcode::LIFETIME_END; + auto *FI = cast<FrameIndexSDNode>(Node->getOperand(1)); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) .addFrameIndex(FI->getIndex()); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 09658b8143fe..ac8a70156522 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -25,6 +25,7 @@ class MachineInstrBuilder; class MCInstrDesc; class SDDbgLabel; class SDDbgValue; +class SDDbgOperand; class TargetLowering; class TargetMachine; @@ -108,16 +109,29 @@ public: /// (which do not go into the machine instrs.) static unsigned CountResults(SDNode *Node); + void AddDbgValueLocationOps(MachineInstrBuilder &MIB, + const MCInstrDesc &DbgValDesc, + ArrayRef<SDDbgOperand> Locations, + DenseMap<SDValue, Register> &VRBaseMap); + /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap); - /// Attempt to emit a dbg_value as a DBG_INSTR_REF. May fail and return - /// nullptr, in which case we fall back to plain EmitDbgValue. + /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead + /// if there is no variable location; alternately a half-formed DBG_INSTR_REF + /// that refers to a virtual register and is corrected later in isel. MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap); + /// Emit a DBG_VALUE $noreg, indicating a variable has no location. + MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); + + /// Emit a DBG_VALUE from the operands to SDDbgValue. + MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap); + /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -148,7 +162,6 @@ private: void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, Register> &VRBaseMap); }; - -} +} // namespace llvm #endif diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62d7191036ca..d92b23f56e4d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -133,12 +133,10 @@ private: SDValue N1, SDValue N2, ArrayRef<int> Mask) const; - bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, const SDLoc &dl, SDValue &Chain, - bool IsSignaling = false); - SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC, + SmallVectorImpl<SDValue> &Results); void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -181,8 +179,6 @@ private: SmallVectorImpl<SDValue> &Results); SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl); - SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); - SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); @@ -1261,6 +1257,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { + // Verify the new types match the original. Glue is waived because + // ISD::ADDC can be legalized by replacing Glue with an integer type. + assert((Res.getValueType() == Node->getValueType(0) || + Node->getValueType(0) == MVT::Glue) && + "Type mismatch for custom legalized operation"); LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); @@ -1268,8 +1269,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } SmallVector<SDValue, 8> ResultVals; - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + // Verify the new types match the original. Glue is waived because + // ISD::ADDC can be legalized by replacing Glue with an integer type. + assert((Res->getValueType(i) == Node->getValueType(i) || + Node->getValueType(i) == MVT::Glue) && + "Type mismatch for custom legalized operation"); ResultVals.push_back(Res.getValue(i)); + } LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; @@ -1363,17 +1370,19 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { MachinePointerInfo()); } - StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - SDValue NewLoad; - if (Op.getValueType().isVector()) + if (Op.getValueType().isVector()) { + StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, + Op.getValueType(), Idx); NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); - else + } else { + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), VecVT.getVectorElementType()); + } // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1398,6 +1407,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. EVT VecVT = Vec.getValueType(); + EVT SubVecVT = Part.getValueType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = @@ -1407,7 +1417,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. - SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + SDValue SubStackPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); // Store the subvector. Ch = DAG.getStore( @@ -1676,152 +1687,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Results.push_back(Tmp2); } -/// Legalize a SETCC with given LHS and RHS and condition code CC on the current -/// target. -/// -/// If the SETCC has been legalized using AND / OR, then the legalized node -/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert -/// will be set to false. -/// -/// If the SETCC has been legalized by using getSetCCSwappedOperands(), -/// then the values of LHS and RHS will be swapped, CC will be set to the -/// new condition, and NeedInvert will be set to false. -/// -/// If the SETCC has been legalized using the inverse condcode, then LHS and -/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert -/// will be set to true. The caller must invert the result of the SETCC with -/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect -/// of a true/false result. -/// -/// \returns true if the SetCC has been legalized, false if it hasn't. -bool SelectionDAGLegalize::LegalizeSetCCCondCode( - EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, - const SDLoc &dl, SDValue &Chain, bool IsSignaling) { - MVT OpVT = LHS.getSimpleValueType(); - ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); - NeedInvert = false; - switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: llvm_unreachable("Unknown condition code action!"); - case TargetLowering::Legal: - // Nothing to do. - break; - case TargetLowering::Expand: { - ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { - std::swap(LHS, RHS); - CC = DAG.getCondCode(InvCC); - return true; - } - // Swapping operands didn't work. Try inverting the condition. - bool NeedSwap = false; - InvCC = getSetCCInverse(CCCode, OpVT); - if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { - // If inverting the condition is not enough, try swapping operands - // on top of it. - InvCC = ISD::getSetCCSwappedOperands(InvCC); - NeedSwap = true; - } - if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { - CC = DAG.getCondCode(InvCC); - NeedInvert = true; - if (NeedSwap) - std::swap(LHS, RHS); - return true; - } - - ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - unsigned Opc = 0; - switch (CCCode) { - default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETUO: - if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) { - CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; - break; - } - assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && - "If SETUE is expanded, SETOEQ or SETUNE must be legal!"); - NeedInvert = true; - LLVM_FALLTHROUGH; - case ISD::SETO: - assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) - && "If SETO is expanded, SETOEQ must be legal!"); - CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETONE: - case ISD::SETUEQ: - // If the SETUO or SETO CC isn't legal, we might be able to use - // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one - // of SETOGT/SETOLT to be legal, the other can be emulated by swapping - // the operands. - CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; - if (!TLI.isCondCodeLegal(CC2, OpVT) && - (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) || - TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) { - CC1 = ISD::SETOGT; - CC2 = ISD::SETOLT; - Opc = ISD::OR; - NeedInvert = ((unsigned)CCCode & 0x8U); - break; - } - LLVM_FALLTHROUGH; - case ISD::SETOEQ: - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETOLT: - case ISD::SETOLE: - case ISD::SETUNE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: - case ISD::SETULE: - // If we are floating point, assign and break, otherwise fall through. - if (!OpVT.isInteger()) { - // We can use the 4th bit to tell if we are the unordered - // or ordered version of the opcode. - CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; - Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; - CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); - break; - } - // Fallthrough if we are unsigned integer. - LLVM_FALLTHROUGH; - case ISD::SETLE: - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETLT: - case ISD::SETNE: - case ISD::SETEQ: - // If all combinations of inverting the condition and swapping operands - // didn't work then we have no means to expand the condition. - llvm_unreachable("Don't know how to expand this condition!"); - } - - SDValue SetCC1, SetCC2; - if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { - // If we aren't the ordered or unorder operation, - // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, - IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, - IsSignaling); - } else { - // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, - IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, - IsSignaling); - } - if (Chain) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), - SetCC2.getValue(1)); - LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); - RHS = SDValue(); - CC = SDValue(); - return true; - } - } - return false; -} - /// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. @@ -2176,21 +2041,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, } void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128, + RTLIB::Libcall LC, SmallVectorImpl<SDValue> &Results) { - RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::f32: LC = Call_F32; break; - case MVT::f64: LC = Call_F64; break; - case MVT::f80: LC = Call_F80; break; - case MVT::f128: LC = Call_F128; break; - case MVT::ppcf128: LC = Call_PPCF128; break; - } + if (LC == RTLIB::UNKNOWN_LIBCALL) + llvm_unreachable("Can't create an unknown libcall!"); if (Node->isStrictFPOpcode()) { EVT RetVT = Node->getValueType(0); @@ -2209,6 +2063,20 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, } } +/// Expand the node to a libcall based on the result type. +void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC = RTLIB::getFPLibCall(Node->getSimpleValueType(0), + Call_F32, Call_F64, Call_F80, + Call_F128, Call_PPCF128); + ExpandFPLibCall(Node, LC, Results); +} + SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -2237,32 +2105,10 @@ void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType(); - - RTLIB::Libcall LC; - switch (InVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::f32: LC = Call_F32; break; - case MVT::f64: LC = Call_F64; break; - case MVT::f80: LC = Call_F80; break; - case MVT::f128: LC = Call_F128; break; - case MVT::ppcf128: LC = Call_PPCF128; break; - } - - if (Node->isStrictFPOpcode()) { - EVT RetVT = Node->getValueType(0); - SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); - TargetLowering::MakeLibCallOptions CallOptions; - // FIXME: This doesn't support tail calls. - std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, - Ops, CallOptions, - SDLoc(Node), - Node->getOperand(0)); - Results.push_back(Tmp.first); - Results.push_back(Tmp.second); - } else { - SDValue Tmp = ExpandLibCall(LC, Node, false); - Results.push_back(Tmp); - } + RTLIB::Libcall LC = RTLIB::getFPLibCall(InVT.getSimpleVT(), + Call_F32, Call_F64, Call_F80, + Call_F128, Call_PPCF128); + ExpandFPLibCall(Node, LC, Results); } /// Issue libcalls to __{u}divmod to compute div / rem pairs. @@ -2782,122 +2628,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node, return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result); } -/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. -SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { - EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - unsigned Sz = VT.getScalarSizeInBits(); - - SDValue Tmp, Tmp2, Tmp3; - - // If we can, perform BSWAP first and then the mask+swap the i4, then i2 - // and finally the i1 pairs. - // TODO: We can easily support i4/i2 legal types if any target ever does. - if (Sz >= 8 && isPowerOf2_32(Sz)) { - // Create the masks - repeating the pattern every byte. - APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); - APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); - APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); - APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); - APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); - APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); - - // BSWAP if the type is wider than a single byte. - Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); - - // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); - Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - - // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); - Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - - // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); - Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - return Tmp; - } - - Tmp = DAG.getConstant(0, dl, VT); - for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { - if (I < J) - Tmp2 = - DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); - else - Tmp2 = - DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); - - APInt Shift(Sz, 1); - Shift <<= J; - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); - Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); - } - - return Tmp; -} - -/// Open code the operations for BSWAP of the specified operation. -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { - EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT().getScalarType().SimpleTy) { - default: llvm_unreachable("Unhandled Expand type in BSWAP!"); - case MVT::i16: - // Use a rotate by 8. This can be further expanded if necessary. - return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - case MVT::i32: - Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(0xFF0000, dl, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); - Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); - return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); - case MVT::i64: - Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, - DAG.getConstant(255ULL<<48, dl, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, - DAG.getConstant(255ULL<<40, dl, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, - DAG.getConstant(255ULL<<32, dl, VT)); - Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, - DAG.getConstant(255ULL<<24, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(255ULL<<16, dl, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, - DAG.getConstant(255ULL<<8 , dl, VT)); - Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); - Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); - Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); - Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); - return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); - } -} - /// Open code the operations for PARITY of the specified operation. SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); @@ -2946,10 +2676,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::BITREVERSE: - Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + if ((Tmp1 = TLI.expandBITREVERSE(Node, DAG))) + Results.push_back(Tmp1); break; case ISD::BSWAP: - Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + if ((Tmp1 = TLI.expandBSWAP(Node, DAG))) + Results.push_back(Tmp1); break; case ISD::PARITY: Results.push_back(ExpandPARITY(Node->getOperand(0), dl)); @@ -3324,6 +3056,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::VECTOR_SPLICE: { + Results.push_back(TLI.expandVectorSplice(Node, DAG)); + break; + } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { @@ -3830,8 +3566,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1 + Offset); Tmp3 = Node->getOperand(2 + Offset); bool Legalized = - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, - NeedInvert, dl, Chain, IsSignaling); + TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, + NeedInvert, dl, Chain, IsSignaling); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3926,8 +3662,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (!Legalized) { - Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, NeedInvert, dl, Chain); + Legalized = TLI.LegalizeSetCCCondCode( + DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, + NeedInvert, dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3961,8 +3698,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(1); // CC bool Legalized = - LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2, - Tmp3, Tmp4, NeedInvert, dl, Chain); + TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); @@ -4145,7 +3882,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering(); + AtomicOrdering Order = cast<AtomicSDNode>(Node)->getMergedOrdering(); RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT); EVT RetVT = Node->getValueType(0); TargetLowering::MakeLibCallOptions CallOptions; @@ -4299,15 +4036,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { - RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::f32: LC = RTLIB::POWI_F32; break; - case MVT::f64: LC = RTLIB::POWI_F64; break; - case MVT::f80: LC = RTLIB::POWI_F80; break; - case MVT::f128: LC = RTLIB::POWI_F128; break; - case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break; - } + RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); if (!TLI.getLibcallName(LC)) { // Some targets don't have a powi libcall; use pow instead. SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), @@ -4318,9 +4048,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Exponent)); break; } - ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128, Results); + unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0; + bool ExponentHasSizeOfInt = + DAG.getLibInfo().getIntSize() == + Node->getOperand(1 + Offset).getValueType().getSizeInBits(); + if (!ExponentHasSizeOfInt) { + // If the exponent does not match with sizeof(int) a libcall to + // RTLIB::POWI would use the wrong type for the argument. + DAG.getContext()->emitError("POWI exponent does not match sizeof(int)"); + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + break; + } + ExpandFPLibCall(Node, LC, Results); break; } case ISD::FPOW: @@ -4634,11 +4373,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS) OVT = Node->getOperand(1).getSimpleValueType(); - if (Node->getOpcode() == ISD::BR_CC) + if (Node->getOpcode() == ISD::BR_CC || + Node->getOpcode() == ISD::SELECT_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); - SDValue Tmp1, Tmp2, Tmp3; + SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: @@ -4830,6 +4570,51 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::VECTOR_SPLICE: { + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::VECTOR_SPLICE, dl, NVT, Tmp1, Tmp2, + Node->getOperand(2)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp3)); + break; + } + case ISD::SELECT_CC: { + SDValue Cond = Node->getOperand(4); + ISD::CondCode CCCode = cast<CondCodeSDNode>(Cond)->get(); + // Type of the comparison operands. + MVT CVT = Node->getSimpleValueType(0); + assert(CVT == OVT && "not handled"); + + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + + // Promote the comparison operands, if needed. + if (TLI.isCondCodeLegal(CCCode, CVT)) { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + } else { + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + } + // Cast the true/false operands. + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp4 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, NVT, {Tmp1, Tmp2, Tmp3, Tmp4, Cond}, + Node->getFlags()); + + // Cast the result back to the original type. + if (ExtOp != ISD::FP_EXTEND) + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1); + else + Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1, + DAG.getIntPtrConstant(0, dl)); + + Results.push_back(Tmp1); + break; + } case ISD::SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 966645e3256d..3553f9ec16c2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -26,6 +27,8 @@ using namespace llvm; #define DEBUG_TYPE "legalize-types" /// GetFPLibCall - Return the right libcall for the given floating point type. +/// FIXME: This is a local version of RTLIB::getFPLibCall that should be +/// refactored away (see RTLIB::getPOWI for an example). static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, @@ -570,14 +573,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); unsigned Offset = IsStrict ? 1 : 0; - assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 && + assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 || + N->getOperand(1 + Offset).getValueType() == MVT::i32) && "Unsupported power type!"); - RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_F128, - RTLIB::POWI_PPCF128); + RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); if (!TLI.getLibcallName(LC)) { // Some targets don't have a powi libcall; use pow instead. // FIXME: Implement this if some target needs it. @@ -585,6 +585,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { return DAG.getUNDEF(N->getValueType(0)); } + if (DAG.getLibInfo().getIntSize() != + N->getOperand(1 + Offset).getValueType().getSizeInBits()) { + // If the exponent does not match with sizeof(int) a libcall to RTLIB::POWI + // would use the wrong type for the argument. + DAG.getContext()->emitError("POWI exponent does not match sizeof(int)"); + return DAG.getUNDEF(N->getValueType(0)); + } + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), N->getOperand(1 + Offset) }; @@ -1515,10 +1523,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), Lo, Hi); + ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4a686bc227de..b8a3dd014901 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -96,8 +97,14 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: + Res = PromoteIntRes_INSERT_SUBVECTOR(N); break; + case ISD::VECTOR_REVERSE: + Res = PromoteIntRes_VECTOR_REVERSE(N); break; case ISD::VECTOR_SHUFFLE: Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::VECTOR_SPLICE: + Res = PromoteIntRes_VECTOR_SPLICE(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; case ISD::BUILD_VECTOR: @@ -106,6 +113,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; case ISD::SPLAT_VECTOR: Res = PromoteIntRes_SPLAT_VECTOR(N); break; + case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -455,6 +463,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { EVT NVT = Op.getValueType(); SDLoc dl(N); + // If the larger BSWAP isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. We only do this for scalars since we have a shuffle + // based lowering for vectors in LegalizeVectorOps. + if (!OVT.isVector() && + !TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) { + if (SDValue Res = TLI.expandBSWAP(N, DAG)) + return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res); + } + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), @@ -467,6 +485,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { EVT NVT = Op.getValueType(); SDLoc dl(N); + // If the larger BITREVERSE isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. We only do this for scalars since we have a shuffle + // based lowering for vectors in LegalizeVectorOps. + if (!OVT.isVector() && OVT.isSimple() && + !TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) { + if (SDValue Res = TLI.expandBITREVERSE(N, DAG)) + return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res); + } + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, @@ -763,6 +791,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + // USUBSAT can always be promoted as long as we have zero-extended the args. + if (Opcode == ISD::USUBSAT) + return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted, + Op2Promoted); + // Shift cannot use a min/max expansion, we can't detect overflow if all of // the bits have been shifted out. if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { @@ -773,8 +814,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { case ISD::SSHLSAT: ShiftOp = ISD::SRA; break; - case ISD::UADDSAT: - case ISD::USUBSAT: case ISD::USHLSAT: ShiftOp = ISD::SRL; break; @@ -795,32 +834,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { SDValue Result = DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); - } else { - if (Opcode == ISD::USUBSAT) { - SDValue Max = - DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); - } - - if (Opcode == ISD::UADDSAT) { - APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); - SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); - SDValue Add = - DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); - } - - unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; - APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); - APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); - SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); - SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); - SDValue Result = - DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); - Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); - Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); - return Result; } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { @@ -1217,17 +1242,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { case TargetLowering::TypeSplitVector: { EVT InVT = InOp.getValueType(); assert(InVT.isVector() && "Cannot split scalar types"); - unsigned NumElts = InVT.getVectorNumElements(); - assert(NumElts == NVT.getVectorNumElements() && + ElementCount NumElts = InVT.getVectorElementCount(); + assert(NumElts == NVT.getVectorElementCount() && "Dst and Src must have the same number of elements"); - assert(isPowerOf2_32(NumElts) && + assert(isPowerOf2_32(NumElts.getKnownMinValue()) && "Promoted vector type must be a power of two"); SDValue EOp1, EOp2; GetSplitVector(InOp, EOp1, EOp2); EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), - NumElts/2); + NumElts.divideCoefficientBy(2)); EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); @@ -1535,6 +1560,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; + + case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1963,8 +1990,37 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { - SDValue Op = SExtPromotedInteger(N->getOperand(1)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); + // FIXME: Support for promotion of STRICT_FPOWI is not implemented yet. + assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet."); + + // The integer operand is the last operand in FPOWI (so the result and + // floating point operand is already type legalized). + + // We can't just promote the exponent type in FPOWI, since we want to lower + // the node to a libcall and we if we promote to a type larger than + // sizeof(int) the libcall might not be according to the targets ABI. Instead + // we rewrite to a libcall here directly, letting makeLibCall handle promotion + // if the target accepts it according to shouldSignExtendTypeInLibCall. + RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); + if (!TLI.getLibcallName(LC)) { + // Some targets don't have a powi libcall; use pow instead. + // FIXME: Implement this if some target needs it. + DAG.getContext()->emitError("Don't know how to promote fpowi to fpow"); + return DAG.getUNDEF(N->getValueType(0)); + } + // The exponent should fit in a sizeof(int) type for the libcall to be valid. + assert(DAG.getLibInfo().getIntSize() == + N->getOperand(1).getValueType().getSizeInBits() && + "POWI exponent should match with sizeof(int) when doing the libcall."); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, + CallOptions, SDLoc(N), SDValue()); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { @@ -2000,6 +2056,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); } +SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { + SDValue Op = ZExtPromotedInteger(N->getOperand(1)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -2186,6 +2247,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: ExpandIntRes_FunnelShift(N, Lo, Hi); break; + + case ISD::VSCALE: + ExpandIntRes_VSCALE(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2197,7 +2262,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering(); + AtomicOrdering order = cast<AtomicSDNode>(Node)->getMergedOrdering(); // Lower to outline atomic libcall if outline atomics enabled, // or to sync libcall otherwise RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT); @@ -3598,18 +3663,16 @@ void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { + assert((Node->getOpcode() == ISD::SADDO || Node->getOpcode() == ISD::SSUBO) && + "Node has unexpected Opcode"); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDLoc dl(Node); SDValue Ovf; - unsigned CarryOp; - switch(Node->getOpcode()) { - default: llvm_unreachable("Node has unexpected Opcode"); - case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break; - case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break; - } + bool IsAdd = Node->getOpcode() == ISD::SADDO; + unsigned CarryOp = IsAdd ? ISD::SADDO_CARRY : ISD::SSUBO_CARRY; bool HasCarryOp = TLI.isOperationLegalOrCustom( CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); @@ -3621,8 +3684,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, GetExpandedInteger(RHS, RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1)); - Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ? - ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL }); + Lo = DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, dl, VTList, {LHSL, RHSL}); Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) }); Ovf = Hi.getValue(1); @@ -3636,28 +3698,36 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, // Compute the overflow. // - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 + // LHSSign -> LHS < 0 + // RHSSign -> RHS < 0 + // SumSign -> Sum < 0 // // Add: // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // + // To get better codegen we can rewrite this by doing bitwise math on + // the integers and extract the final sign bit at the end. So the + // above becomes: + // + // Add: + // Overflow -> (~(LHS ^ RHS) & (LHS ^ Sum)) < 0 + // Sub: + // Overflow -> ((LHS ^ RHS) & (LHS ^ Sum)) < 0 + // + // NOTE: This is different than the expansion we do in expandSADDSUBO + // because it is more costly to determine the RHS is > 0 for SSUBO with the + // integers split. + EVT VT = LHS.getValueType(); + SDValue SignsMatch = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + if (IsAdd) + SignsMatch = DAG.getNOT(dl, SignsMatch, VT); + + SDValue SumSignNE = DAG.getNode(ISD::XOR, dl, VT, LHS, Sum); + Ovf = DAG.getNode(ISD::AND, dl, VT, SignsMatch, SumSignNE); EVT OType = Node->getValueType(1); - SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - Node->getOpcode() == ISD::SADDO ? - ISD::SETEQ : ISD::SETNE); - - SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Ovf = DAG.getSetCC(dl, OType, Ovf, DAG.getConstant(0, dl, VT), ISD::SETLT); } // Use the calculated overflow everywhere. @@ -3909,33 +3979,32 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) - // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh - // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // %4 = add iNh %1.0, %2.0 as iN + // %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH) // - // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } + // %lo = %3.LO + // %hi = %5.0 + // %ovf = %0 || %1.1 || %2.1 || %5.1 SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; - SplitInteger(LHS, LHSLow, LHSHigh); - SplitInteger(RHS, RHSLow, RHSHigh); - EVT HalfVT = LHSLow.getValueType() - , BitVT = N->getValueType(1); - SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); - SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + GetExpandedInteger(LHS, LHSLow, LHSHigh); + GetExpandedInteger(RHS, RHSLow, RHSHigh); + EVT HalfVT = LHSLow.getValueType(); + EVT BitVT = N->getValueType(1); + SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT); SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); - SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow); Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); - SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, - One.getValue(0)); - SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow); Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); - SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, - Two.getValue(0)); + + SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two); // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this @@ -3946,10 +4015,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Three = DAG.getNode(ISD::MUL, dl, VT, DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); - SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); - SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); - Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); - SplitInteger(Five, Lo, Hi); + SplitInteger(Three, Lo, Hi); + + Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1)); ReplaceValueWith(SDValue(N, 1), Overflow); return; } @@ -4133,6 +4202,21 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SplitInteger(Res, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT HalfVT = + EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2); + SDLoc dl(N); + + // We assume VSCALE(1) fits into a legal integer. + APInt One(HalfVT.getSizeInBits(), 1); + SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One); + VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase); + SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0)); + SplitInteger(Res, Lo, Hi); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -4163,6 +4247,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; @@ -4418,6 +4503,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { LowCmp.getValue(1), Cond); } +SDValue DAGTypeLegalizer::ExpandIntOp_SPLAT_VECTOR(SDNode *N) { + // Split the operand and replace with SPLAT_VECTOR_PARTS. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, SDLoc(N), N->getValueType(0), Lo, + Hi); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the @@ -4587,6 +4680,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { return Swap.getValue(1); } +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { + SDLoc dl(N); + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = V0.getValueType(); + + return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2)); +} SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { @@ -4648,6 +4750,35 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBuildVector(NOutVT, dl, Ops); } +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + SDLoc dl(N); + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + + EVT SubVecVT = SubVec.getValueType(); + EVT NSubVT = + EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(), + SubVecVT.getVectorElementCount()); + + Vec = GetPromotedInteger(Vec); + SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec); + + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) { + SDLoc dl(N); + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + EVT OutVT = V0.getValueType(); + + return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0); +} SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); @@ -4725,6 +4856,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { + SDLoc dl(N); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + return DAG.getStepVector(dl, NOutVT, + StepVal.sext(NOutVT.getScalarSizeInBits())); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a59f03854775..05a974af3b55 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -182,9 +182,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // Checked that NewNodes are only used by other NewNodes. for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { SDNode *N = NewNodes[i]; - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) - assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + for (SDNode *U : N->uses()) + assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!"); } #endif } @@ -396,9 +395,7 @@ NodeDone: assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); N->setNodeId(Processed); - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : N->uses()) { int NodeId = User->getNodeId(); // This node has two options: it can either be a new node or its Node ID diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 630a0a9adaf7..8d17d8fc68b1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -298,10 +298,14 @@ private: SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntRes_STEP_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -390,6 +394,7 @@ private: SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); + SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -460,6 +465,8 @@ private: void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -478,6 +485,7 @@ private: SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); SDValue ExpandIntOp_RETURNADDR(SDNode *N); SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); + SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -832,9 +840,12 @@ private: void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -920,6 +931,7 @@ private: SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4015a5a0ce70..ebe3bfc4b75a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -138,6 +138,7 @@ class VectorLegalizer { SDValue ExpandStore(SDNode *N); SDValue ExpandFNEG(SDNode *Node); void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SELECT: case ISD::VSELECT: case ISD::SELECT_CC: - case ISD::SETCC: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::TRUNCATE: @@ -457,6 +457,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::USHLSAT: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: + case ISD::MGATHER: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: @@ -495,6 +496,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(1).getValueType()); break; + case ISD::SETCC: { + MVT OpVT = Node->getOperand(0).getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + } } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); @@ -529,7 +538,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return RecursivelyLegalizeResults(Op, ResultVals); } -// FIME: This is very similar to the X86 override of +// FIXME: This is very similar to the X86 override of // TargetLowering::LowerOperationWrapper. Can we merge them somehow? bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -762,7 +771,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandFSUB(Node, Results); return; case ISD::SETCC: - Results.push_back(UnrollVSETCC(Node)); + ExpandSETCC(Node, Results); return; case ISD::ABS: if (TLI.expandABS(Node, Tmp, DAG)) { @@ -916,11 +925,16 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. - // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + // Also, we need to be able to construct a splat vector using either + // BUILD_VECTOR or SPLAT_VECTOR. + // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to + // BUILD_VECTOR? if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR + : ISD::SPLAT_VECTOR, + VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Node); // Generate a mask operand. @@ -934,8 +948,11 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { BitTy), DAG.getConstant(0, DL, BitTy)); - // Broadcast the mask so that the entire vector is all-one or all zero. - Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); + // Broadcast the mask so that the entire vector is all one or all zero. + if (VT.isFixedLengthVector()) + Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); + else + Mask = DAG.getSplatVector(MaskTy, DL, Mask); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -1163,14 +1180,19 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. - // This operation also isn't safe with AND, OR, XOR when the boolean - // type is 0/1 as we need an all ones vector constant to mask with. - // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(Op1.getValueType()) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Node); + + // This operation also isn't safe with AND, OR, XOR when the boolean type is + // 0/1 and the select operands aren't also booleans, as we need an all-ones + // vector constant to mask with. + // FIXME: Sign extend 1 to all ones if that's legal on the target. + auto BoolContents = TLI.getBooleanContents(Op1.getValueType()); + if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && + !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && + Op1.getValueType().getVectorElementType() == MVT::i1)) return DAG.UnrollVectorOp(Node); // If the mask and the type are different sizes, unroll the vector op. This @@ -1331,6 +1353,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node, Results.push_back(Tmp); } +void VectorLegalizer::ExpandSETCC(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + bool NeedInvert = false; + SDLoc dl(Node); + MVT OpVT = Node->getOperand(0).getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); + + if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) { + Results.push_back(UnrollVSETCC(Node)); + return; + } + + SDValue Chain; + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue CC = Node->getOperand(2); + bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, + RHS, CC, NeedInvert, dl, Chain); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (CC.getNode()) + LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, + Node->getFlags()); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); + } else { + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + EVT VT = Node->getValueType(0); + LHS = + DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS, + DAG.getBoolConstant(true, dl, VT, LHS.getValueType()), + DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC); + LHS->setFlags(Node->getFlags()); + } + + Results.push_back(LHS); +} + void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 57cb364f1939..91242bbf866f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -90,6 +90,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -318,10 +319,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), - NewVT, Op, N->getOperand(1)); + SDLoc DL(N); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + // The result needs scalarizing, but it's not a given that the source does. + // See similar logic in ScalarizeVecRes_UnaryOp. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); + } + return DAG.getNode(ISD::FP_ROUND, DL, + N->getValueType(0).getVectorElementType(), Op, + N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { @@ -917,6 +929,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); break; + case ISD::STEP_VECTOR: + SplitVecRes_STEP_VECTOR(N, Lo, Hi); + break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); @@ -930,9 +945,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; + case ISD::VECTOR_REVERSE: + SplitVecRes_VECTOR_REVERSE(N, Lo, Hi); + break; case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::VECTOR_SPLICE: + SplitVecRes_VECTOR_SPLICE(N, Lo, Hi); + break; case ISD::VAARG: SplitVecRes_VAARG(N, Lo, Hi); break; @@ -963,6 +984,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: @@ -1243,7 +1265,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl)); + DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl)); } void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -1255,22 +1277,29 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, GetSplitVector(Vec, Lo, Hi); EVT VecVT = Vec.getValueType(); - unsigned VecElems = VecVT.getVectorNumElements(); - unsigned SubElems = SubVec.getValueType().getVectorNumElements(); - - // If we know the index is 0, and we know the subvector doesn't cross the - // boundary between the halves, we can avoid spilling the vector, and insert - // into the lower half of the split vector directly. - // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever - // there is no boundary crossing. But those cases don't seem to get hit in - // practice. + EVT LoVT = Lo.getValueType(); + EVT SubVecVT = SubVec.getValueType(); + unsigned VecElems = VecVT.getVectorMinNumElements(); + unsigned SubElems = SubVecVT.getVectorMinNumElements(); + unsigned LoElems = LoVT.getVectorMinNumElements(); + + // If we know the index is in the first half, and we know the subvector + // doesn't cross the boundary between the halves, we can avoid spilling the + // vector, and insert into the lower half of the split vector directly. unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + if (IdxVal + SubElems <= LoElems) { Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); return; } + // Similarly if the subvector is fully in the high half, but mind that we + // can't tell whether a fixed-length subvector is fully within the high half + // of a scalable vector. + if (VecVT.isScalableVector() == SubVecVT.isScalableVector() && + IdxVal >= LoElems && IdxVal + SubElems <= VecElems) { + Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec, + DAG.getVectorIdxConstant(IdxVal - LoElems, dl)); + return; + } // Spill the vector to the stack. // In cases where the vector is illegal it will be broken down into parts @@ -1286,7 +1315,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SmallestAlign); // Store the new subvector into the specified index. - SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + SDValue SubVecPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo::getUnknownStack(MF)); @@ -1295,13 +1325,12 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SmallestAlign); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueSizeInBits() / 8; - StackPtr = - DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl); + auto *Load = cast<LoadSDNode>(Lo); + MachinePointerInfo MPI = Load->getPointerInfo(); + IncrementPointer(Load, LoVT, MPI, StackPtr); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), SmallestAlign); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -1616,6 +1645,29 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } +void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + assert(N->getValueType(0).isScalableVector() && + "Only scalable vectors are supported for STEP_VECTOR"); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDValue Step = N->getOperand(0); + + Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step); + + // Hi = Lo + (EltCnt * Step) + EVT EltVT = Step.getValueType(); + APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue(); + SDValue StartOfHi = + DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements()); + StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); + StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi); + + Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step); + Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi); +} + void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; @@ -2802,6 +2854,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); } + // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -3097,6 +3150,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; @@ -3911,7 +3965,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); @@ -3926,7 +3979,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (IdxVal == 0 && InVT == WidenVT) return InOp; + if (VT.isScalableVector()) + report_fatal_error("Don't know how to widen the result of " + "EXTRACT_SUBVECTOR for scalable vectors"); + // Check if we can extract from the vector. + unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned InNumElts = InVT.getVectorNumElements(); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); @@ -4201,6 +4259,12 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { return SDValue(); EVT VSelVT = N->getValueType(0); + + // This method can't handle scalable vector types. + // FIXME: This support could be added in the future. + if (VSelVT.isScalableVector()) + return SDValue(); + // Only handle vector types which are a power of 2. if (!isPowerOf2_64(VSelVT.getSizeInBits())) return SDValue(); @@ -4471,6 +4535,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; @@ -4569,8 +4634,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { EVT InVT = InOp.getValueType(); if (InVT.getSizeInBits() != VT.getSizeInBits()) { EVT InEltVT = InVT.getVectorElementType(); - for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { - EVT FixedVT = (MVT::SimpleValueType)i; + for (EVT FixedVT : MVT::vector_valuetypes()) { EVT FixedEltVT = FixedVT.getVectorElementType(); if (TLI.isTypeLegal(FixedVT) && FixedVT.getSizeInBits() == VT.getSizeInBits() && @@ -4785,6 +4849,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(VT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + SDValue SubVec = N->getOperand(1); + SDValue InVec = N->getOperand(0); + + if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) + InVec = GetWidenedVector(InVec); + + if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) + SubVec = GetWidenedVector(SubVec); + + if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + N->getConstantOperandVal(2) == 0) + return SubVec; + + report_fatal_error("Don't know how to widen the operands for " + "INSERT_SUBVECTOR"); +} + SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), @@ -5079,14 +5161,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (!Scalable && Width == WidenEltWidth) return RetVT; - // See if there is larger legal integer than the element type to load/store. - unsigned VT; // Don't bother looking for an integer type if the vector is scalable, skip // to vector types. if (!Scalable) { - for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; - VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { - EVT MemVT((MVT::SimpleValueType) VT); + // See if there is larger legal integer than the element type to load/store. + for (EVT MemVT : reverse(MVT::integer_valuetypes())) { unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; @@ -5107,9 +5186,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // See if there is a larger vector type to load/store that has the same vector // element type and is evenly divisible with the WidenVT. - for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { - EVT MemVT = (MVT::SimpleValueType) VT; + for (EVT MemVT : reverse(MVT::vector_valuetypes())) { // Skip vector MVTs which don't match the scalable property of WidenVT. if (Scalable != MemVT.isScalableVector()) continue; @@ -5492,3 +5569,29 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, Ops[Idx] = FillVal; return DAG.getBuildVector(NVT, dl, Ops); } + +void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue InLo, InHi; + GetSplitVector(N->getOperand(0), InLo, InHi); + SDLoc DL(N); + + Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); + Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc DL(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Expanded = TLI.expandVectorSplice(N, DAG); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, + DAG.getVectorIdxConstant(0, DL)); + Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, + DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 65b9d017fc5c..75b4242a415c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -25,69 +25,155 @@ class SDNode; class Value; class raw_ostream; -/// Holds the information from a dbg_value node through SDISel. -/// We do not use SDValue here to avoid including its header. -class SDDbgValue { +/// Holds the information for a single machine location through SDISel; either +/// an SDNode, a constant, a stack location, or a virtual register. +class SDDbgOperand { public: - enum DbgValueKind { - SDNODE = 0, ///< Value is the result of an expression. - CONST = 1, ///< Value is a constant. - FRAMEIX = 2, ///< Value is contents of a stack location. - VREG = 3 ///< Value is a virtual register. + enum Kind { + SDNODE = 0, ///< Value is the result of an expression. + CONST = 1, ///< Value is a constant. + FRAMEIX = 2, ///< Value is contents of a stack location. + VREG = 3 ///< Value is a virtual register. }; + Kind getKind() const { return kind; } + + /// Returns the SDNode* for a register ref + SDNode *getSDNode() const { + assert(kind == SDNODE); + return u.s.Node; + } + + /// Returns the ResNo for a register ref + unsigned getResNo() const { + assert(kind == SDNODE); + return u.s.ResNo; + } + + /// Returns the Value* for a constant + const Value *getConst() const { + assert(kind == CONST); + return u.Const; + } + + /// Returns the FrameIx for a stack object + unsigned getFrameIx() const { + assert(kind == FRAMEIX); + return u.FrameIx; + } + + /// Returns the Virtual Register for a VReg + unsigned getVReg() const { + assert(kind == VREG); + return u.VReg; + } + + static SDDbgOperand fromNode(SDNode *Node, unsigned ResNo) { + return SDDbgOperand(Node, ResNo); + } + static SDDbgOperand fromFrameIdx(unsigned FrameIdx) { + return SDDbgOperand(FrameIdx, FRAMEIX); + } + static SDDbgOperand fromVReg(unsigned VReg) { + return SDDbgOperand(VReg, VREG); + } + static SDDbgOperand fromConst(const Value *Const) { + return SDDbgOperand(Const); + } + + bool operator!=(const SDDbgOperand &Other) const { return !(*this == Other); } + bool operator==(const SDDbgOperand &Other) const { + if (kind != Other.kind) + return false; + switch (kind) { + case SDNODE: + return getSDNode() == Other.getSDNode() && getResNo() == Other.getResNo(); + case CONST: + return getConst() == Other.getConst(); + case VREG: + return getVReg() == Other.getVReg(); + case FRAMEIX: + return getFrameIx() == Other.getFrameIx(); + } + return false; + } + private: + Kind kind; union { struct { - SDNode *Node; ///< Valid for expressions. - unsigned ResNo; ///< Valid for expressions. + SDNode *Node; ///< Valid for expressions. + unsigned ResNo; ///< Valid for expressions. } s; - const Value *Const; ///< Valid for constants. - unsigned FrameIx; ///< Valid for stack objects. - unsigned VReg; ///< Valid for registers. + const Value *Const; ///< Valid for constants. + unsigned FrameIx; ///< Valid for stack objects. + unsigned VReg; ///< Valid for registers. } u; - DIVariable *Var; - DIExpression *Expr; - DebugLoc DL; - unsigned Order; - enum DbgValueKind kind; - bool IsIndirect; - bool Invalid = false; - bool Emitted = false; -public: /// Constructor for non-constants. - SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, - bool indir, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) { - kind = SDNODE; + SDDbgOperand(SDNode *N, unsigned R) : kind(SDNODE) { u.s.Node = N; u.s.ResNo = R; } - /// Constructor for constants. - SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl, - unsigned O) - : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { - kind = CONST; - u.Const = C; - } - + SDDbgOperand(const Value *C) : kind(CONST) { u.Const = C; } /// Constructor for virtual registers and frame indices. - SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx, - bool IsIndirect, DebugLoc DL, unsigned Order, - enum DbgValueKind Kind) - : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) { + SDDbgOperand(unsigned VRegOrFrameIdx, Kind Kind) : kind(Kind) { assert((Kind == VREG || Kind == FRAMEIX) && "Invalid SDDbgValue constructor"); - kind = Kind; if (kind == VREG) u.VReg = VRegOrFrameIdx; else u.FrameIx = VRegOrFrameIdx; } +}; - /// Returns the kind. - DbgValueKind getKind() const { return kind; } +/// Holds the information from a dbg_value node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgValue { +public: + +private: + // SDDbgValues are allocated by a BumpPtrAllocator, which means the destructor + // may not be called; therefore all member arrays must also be allocated by + // that BumpPtrAllocator, to ensure that they are correctly freed. + size_t NumLocationOps; + SDDbgOperand *LocationOps; + // SDNode dependencies will be calculated as SDNodes that appear in + // LocationOps plus these AdditionalDependencies. + size_t NumAdditionalDependencies; + SDNode **AdditionalDependencies; + DIVariable *Var; + DIExpression *Expr; + DebugLoc DL; + unsigned Order; + bool IsIndirect; + bool IsVariadic; + bool Invalid = false; + bool Emitted = false; + +public: + SDDbgValue(BumpPtrAllocator &Alloc, DIVariable *Var, DIExpression *Expr, + ArrayRef<SDDbgOperand> L, ArrayRef<SDNode *> Dependencies, + bool IsIndirect, DebugLoc DL, unsigned O, bool IsVariadic) + : NumLocationOps(L.size()), + LocationOps(Alloc.Allocate<SDDbgOperand>(L.size())), + NumAdditionalDependencies(Dependencies.size()), + AdditionalDependencies(Alloc.Allocate<SDNode *>(Dependencies.size())), + Var(Var), Expr(Expr), DL(DL), Order(O), IsIndirect(IsIndirect), + IsVariadic(IsVariadic) { + assert(IsVariadic || L.size() == 1); + assert(!(IsVariadic && IsIndirect)); + std::copy(L.begin(), L.end(), LocationOps); + std::copy(Dependencies.begin(), Dependencies.end(), AdditionalDependencies); + } + + // We allocate arrays with the BumpPtrAllocator and never free or copy them, + // for LocationOps and AdditionalDependencies, as we never expect to copy or + // destroy an SDDbgValue. If we ever start copying or destroying instances, we + // should manage the allocated memory appropriately. + SDDbgValue(const SDDbgValue &Other) = delete; + SDDbgValue &operator=(const SDDbgValue &Other) = delete; + ~SDDbgValue() = delete; /// Returns the DIVariable pointer for the variable. DIVariable *getVariable() const { return Var; } @@ -95,26 +181,37 @@ public: /// Returns the DIExpression pointer for the expression. DIExpression *getExpression() const { return Expr; } - /// Returns the SDNode* for a register ref - SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } - - /// Returns the ResNo for a register ref - unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } + ArrayRef<SDDbgOperand> getLocationOps() const { + return ArrayRef<SDDbgOperand>(LocationOps, NumLocationOps); + } - /// Returns the Value* for a constant - const Value *getConst() const { assert (kind==CONST); return u.Const; } + SmallVector<SDDbgOperand> copyLocationOps() const { + return SmallVector<SDDbgOperand>(LocationOps, LocationOps + NumLocationOps); + } - /// Returns the FrameIx for a stack object - unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } + // Returns the SDNodes which this SDDbgValue depends on. + SmallVector<SDNode *> getSDNodes() const { + SmallVector<SDNode *> Dependencies; + for (SDDbgOperand DbgOp : getLocationOps()) + if (DbgOp.getKind() == SDDbgOperand::SDNODE) + Dependencies.push_back(DbgOp.getSDNode()); + for (SDNode *Node : getAdditionalDependencies()) + Dependencies.push_back(Node); + return Dependencies; + } - /// Returns the Virtual Register for a VReg - unsigned getVReg() const { assert (kind==VREG); return u.VReg; } + ArrayRef<SDNode *> getAdditionalDependencies() const { + return ArrayRef<SDNode *>(AdditionalDependencies, + NumAdditionalDependencies); + } /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } + bool isVariadic() const { return IsVariadic; } + /// Returns the DebugLoc. - DebugLoc getDebugLoc() const { return DL; } + const DebugLoc &getDebugLoc() const { return DL; } /// Returns the SDNodeOrder. This is the order of the preceding node in the /// input. @@ -154,7 +251,7 @@ public: MDNode *getLabel() const { return Label; } /// Returns the DebugLoc. - DebugLoc getDebugLoc() const { return DL; } + const DebugLoc &getDebugLoc() const { return DL; } /// Returns the SDNodeOrder. This is the order of the preceding node in the /// input. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index debfdda90e1e..b2a8c8bdd78c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -739,6 +739,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (!N->getHasDebugValue()) return; + /// Returns true if \p DV has any VReg operand locations which don't exist in + /// VRBaseMap. + auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { + for (SDDbgOperand L : DV->getLocationOps()) { + if (L.getKind() == SDDbgOperand::SDNODE && + VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0) + return true; + } + return false; + }; + // Opportunistically insert immediate dbg_value uses, i.e. those with the same // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); @@ -747,13 +758,20 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (DV->isEmitted()) continue; unsigned DVOrder = DV->getOrder(); - if (!Order || DVOrder == Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); - if (DbgMI) { - Orders.push_back({DVOrder, DbgMI}); - BB->insert(InsertPos, DbgMI); - } - } + if (Order != 0 && DVOrder != Order) + continue; + // If DV has any VReg location operands which haven't been mapped then + // either that node is no longer available or we just haven't visited the + // node yet. In the former case we should emit an undef dbg_value, but we + // can do it later. And for the latter we'll want to wait until all + // dependent nodes have been visited. + if (!DV->isInvalidated() && HasUnknownVReg(DV)) + continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); + if (!DbgMI) + continue; + Orders.push_back({DVOrder, DbgMI}); + BB->insert(InsertPos, DbgMI); } } @@ -790,20 +808,21 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, void ScheduleDAGSDNodes:: EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, MachineBasicBlock::iterator InsertPos) { - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->CopyDstRC) { + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + continue; // ignore chain preds + if (Pred.getSUnit()->CopyDstRC) { // Copy to physical register. - DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit()); + DenseMap<SUnit *, Register>::iterator VRI = + VRBaseMap.find(Pred.getSUnit()); assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); // Find the destination physical register. Register Reg; - for (SUnit::const_succ_iterator II = SU->Succs.begin(), - EE = SU->Succs.end(); II != EE; ++II) { - if (II->isCtrl()) continue; // ignore chain preds - if (II->getReg()) { - Reg = II->getReg(); + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + continue; // ignore chain preds + if (Succ.getReg()) { + Reg = Succ.getReg(); break; } } @@ -811,13 +830,13 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, .addReg(VRI->second); } else { // Copy from physical register. - assert(I->getReg() && "Unknown physical register!"); + assert(Pred.getReg() && "Unknown physical register!"); Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(I->getReg()); + .addReg(Pred.getReg()); } break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index e7bac73678a7..540a6e3efbe1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -136,12 +136,11 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { // Top down: release successors. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && + for (SDep &Succ : SU->Succs) { + assert(!Succ.isAssignedRegDep() && "The list-td scheduler doesn't yet support physreg dependencies!"); - releaseSucc(SU, *I); + releaseSucc(SU, Succ); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2090762e2ff4..2a98464425c4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -146,6 +146,10 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize); return true; } + if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) { + SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize); + return true; + } } auto *BV = dyn_cast<BuildVectorSDNode>(N); @@ -338,8 +342,9 @@ bool ISD::matchBinaryPredicate( return Match(LHSCst, RHSCst); // TODO: Add support for vector UNDEF cases? - if (ISD::BUILD_VECTOR != LHS.getOpcode() || - ISD::BUILD_VECTOR != RHS.getOpcode()) + if (LHS.getOpcode() != RHS.getOpcode() || + (LHS.getOpcode() != ISD::BUILD_VECTOR && + LHS.getOpcode() != ISD::SPLAT_VECTOR)) return false; EVT SVT = LHS.getValueType().getScalarType(); @@ -879,6 +884,17 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { DeallocateNode(N); } +void SDDbgInfo::add(SDDbgValue *V, bool isParameter) { + assert(!(V->isVariadic() && isParameter)); + if (isParameter) + ByvalParmDbgValues.push_back(V); + else + DbgValues.push_back(V); + for (const SDNode *Node : V->getSDNodes()) + if (Node) + DbgValMap[Node].push_back(V); +} + void SDDbgInfo::erase(const SDNode *Node) { DbgValMapType::iterator I = DbgValMap.find(Node); if (I == DbgValMap.end()) @@ -932,12 +948,12 @@ static void VerifySDNode(SDNode *N) { assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && "Wrong number of operands!"); EVT EltVT = N->getValueType(0).getVectorElementType(); - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { - assert((I->getValueType() == EltVT || - (EltVT.isInteger() && I->getValueType().isInteger() && - EltVT.bitsLE(I->getValueType()))) && - "Wrong operand type!"); - assert(I->getValueType() == N->getOperand(0).getValueType() && + for (const SDUse &Op : N->ops()) { + assert((Op.getValueType() == EltVT || + (EltVT.isInteger() && Op.getValueType().isInteger() && + EltVT.bitsLE(Op.getValueType()))) && + "Wrong operand type!"); + assert(Op.getValueType() == N->getOperand(0).getValueType() && "Operands must all have the same type"); } break; @@ -1372,6 +1388,22 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, const APInt &NewVal = Elt->getValue(); EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + + // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. + if (VT.isScalableVector()) { + assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && + "Can only handle an even split!"); + unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; + + SmallVector<SDValue, 2> ScalarParts; + for (unsigned i = 0; i != Parts; ++i) + ScalarParts.push_back(getConstant( + NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); + + return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts); + } + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); @@ -1381,11 +1413,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); SmallVector<SDValue, 2> EltParts; - for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) EltParts.push_back(getConstant( - NewVal.lshr(i * ViaEltSizeInBits).zextOrTrunc(ViaEltSizeInBits), DL, + NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, ViaEltVT, isT, isO)); - } // EltParts is currently in little endian order. If we actually want // big-endian order then reverse it now. @@ -1498,17 +1529,17 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, EVT EltVT = VT.getScalarType(); if (EltVT == MVT::f32) return getConstantFP(APFloat((float)Val), DL, VT, isTarget); - else if (EltVT == MVT::f64) + if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); - else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || - EltVT == MVT::f16 || EltVT == MVT::bf16) { + if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || + EltVT == MVT::f16 || EltVT == MVT::bf16) { bool Ignored; APFloat APF = APFloat(Val); APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &Ignored); return getConstantFP(APF, DL, VT, isTarget); - } else - llvm_unreachable("Unsupported type in getConstantFP"); + } + llvm_unreachable("Unsupported type in getConstantFP"); } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, @@ -1717,6 +1748,25 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { return SDValue(CondCodeNodes[Cond], 0); } +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { + APInt One(ResVT.getScalarSizeInBits(), 1); + return getStepVector(DL, ResVT, One); +} + +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) { + assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); + if (ResVT.isScalableVector()) + return getNode( + ISD::STEP_VECTOR, DL, ResVT, + getTargetConstant(StepVal, DL, ResVT.getVectorElementType())); + + SmallVector<SDValue, 16> OpsStepConstants; + for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) + OpsStepConstants.push_back( + getConstant(StepVal * i, DL, ResVT.getVectorElementType())); + return getBuildVector(ResVT, DL, OpsStepConstants); +} + /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that /// point at N1 to point at N2 and indices that point at N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { @@ -1727,7 +1777,7 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) { assert(VT.getVectorNumElements() == Mask.size() && - "Must have the same number of vector elements as mask elements!"); + "Must have the same number of vector elements as mask elements!"); assert(VT == N1.getValueType() && VT == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); @@ -2430,7 +2480,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; case ISD::ADD: case ISD::SUB: - case ISD::AND: { + case ISD::AND: + case ISD::XOR: + case ISD::OR: { APInt UndefLHS, UndefRHS; SDValue LHS = V.getOperand(0); SDValue RHS = V.getOperand(1); @@ -2439,8 +2491,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, UndefElts = UndefLHS | UndefRHS; return true; } - break; + return false; } + case ISD::ABS: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -2495,6 +2548,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. SDValue Src = V.getOperand(0); + // We don't support scalable vectors at the moment. + if (Src.getValueType().isScalableVector()) + return false; uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; @@ -2578,12 +2634,21 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { return SDValue(); } -SDValue SelectionDAG::getSplatValue(SDValue V) { +SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) { int SplatIdx; - if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) - return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), - SrcVector.getValueType().getScalarType(), SrcVector, + if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) { + EVT SVT = SrcVector.getValueType().getScalarType(); + EVT LegalSVT = SVT; + if (LegalTypes && !TLI->isTypeLegal(SVT)) { + if (!SVT.isInteger()) + return SDValue(); + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), LegalSVT, SrcVector, getVectorIdxConstant(SplatIdx, SDLoc(V))); + } return SDValue(); } @@ -2791,8 +2856,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); for (unsigned i = 0; i != NumSubVectors; ++i) { - APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); - DemandedSub = DemandedSub.trunc(NumSubVectorElts); + APInt DemandedSub = + DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); @@ -2888,8 +2953,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), Depth + 1); unsigned Shifts = IsLE ? i : SubScale - 1 - i; - Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); - Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.insertBits(Known2, SubBitWidth * Shifts); } } @@ -2913,8 +2977,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (DemandedElts[i]) { unsigned Shifts = IsLE ? i : NumElts - 1 - i; unsigned Offset = (Shifts % SubScale) * BitWidth; - Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); - Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); + Known = KnownBits::commonBits(Known, + Known2.extractBits(BitWidth, Offset)); // If we don't know any bits, early out. if (Known.isUnknown()) break; @@ -2943,7 +3007,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = KnownBits::computeForMul(Known, Known2); + Known = KnownBits::mul(Known, Known2); + break; + } + case ISD::MULHU: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = KnownBits::mulhu(Known, Known2); + break; + } + case ISD::MULHS: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = KnownBits::mulhs(Known, Known2); + break; + } + case ISD::UMUL_LOHI: { + assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Op.getResNo() == 0) + Known = KnownBits::mul(Known, Known2); + else + Known = KnownBits::mulhu(Known, Known2); + break; + } + case ISD::SMUL_LOHI: { + assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Op.getResNo() == 0) + Known = KnownBits::mul(Known, Known2); + else + Known = KnownBits::mulhs(Known, Known2); break; } case ISD::UDIV: { @@ -2975,7 +3071,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::SMULO: case ISD::UMULO: - case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. @@ -3373,6 +3468,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = Known2.abs(); break; } + case ISD::USUBSAT: { + // The result of usubsat will never be larger than the LHS. + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.Zero.setHighBits(Known2.countMinLeadingZeros()); + break; + } case ISD::UMIN: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3424,6 +3525,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::smin(Known, Known2); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + if (Op.getResNo() == 1) { + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + LLVM_FALLTHROUGH; + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: { + unsigned MemBits = + cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits(); + // If we are looking at the loaded value. + if (Op.getResNo() == 0) { + if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) + Known.Zero.setBitsFrom(MemBits); + } + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), @@ -3867,6 +4004,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; } + case ISD::SREM: + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. The magnitude of the result should be less than or + // equal to the magnitude of the LHS. Therefore, the result should have + // at least as many sign bits as the left hand side. + return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3922,6 +4065,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); + // ComputeNumSignBits not yet implemented for scalable vectors. + if (VecVT.isScalableVector()) + break; const unsigned BitWidth = Op.getValueSizeInBits(); const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); @@ -3961,8 +4107,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { - APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); - DemandedSub = DemandedSub.trunc(NumSubVectorElts); + APInt DemandedSub = + DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); if (!DemandedSub) continue; Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); @@ -3995,6 +4141,33 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: { + Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits(); + // If we are looking at the loaded value. + if (Op.getResNo() == 0) { + if (Tmp == VTBits) + return 1; // early-out + if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND) + return VTBits - Tmp + 1; + if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) + return VTBits - Tmp; + } + break; + } } // If we are looking at the loaded value of the SDNode. @@ -4075,6 +4248,61 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(FirstAnswer, Mask.countLeadingOnes()); } +bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, + unsigned Depth) const { + // Early out for FREEZE. + if (Op.getOpcode() == ISD::FREEZE) + return true; + + // TODO: Assume we don't know anything for now. + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return false; + + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); +} + +bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, + const APInt &DemandedElts, + bool PoisonOnly, + unsigned Depth) const { + unsigned Opcode = Op.getOpcode(); + + // Early out for FREEZE. + if (Opcode == ISD::FREEZE) + return true; + + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + + if (isIntOrFPConstant(Op)) + return true; + + switch (Opcode) { + case ISD::UNDEF: + return PoisonOnly; + + // TODO: ISD::BUILD_VECTOR handling + + // TODO: Search for noundef attributes from library functions. + + // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. + + default: + // Allow the target to implement this method for its nodes. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode( + Op, DemandedElts, *this, PoisonOnly, Depth); + break; + } + + return false; +} + bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa<ConstantSDNode>(Op.getOperand(1))) @@ -4256,7 +4484,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); + return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), + computeKnownBits(B)); +} + +static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, + SelectionDAG &DAG) { + if (cast<ConstantSDNode>(Step)->isNullValue()) + return DAG.getConstant(0, DL, VT); + + return SDValue(); } static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, @@ -4408,6 +4645,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, const SDNodeFlags Flags) { + assert(Operand.getOpcode() != ISD::DELETED_NODE && + "Operand is DELETED_NODE!"); // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -4424,10 +4663,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (C->isOpaque()) break; LLVM_FALLTHROUGH; - case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::ANY_EXTEND: + // Some targets like RISCV prefer to sign extend some types. + if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT)) + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), @@ -4478,6 +4723,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat::rmNearestTiesToEven, &Ignored); return getConstantFP(FPV, DL, VT); } + case ISD::STEP_VECTOR: { + if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this)) + return V; + break; + } } } @@ -4531,9 +4781,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::BITCAST: if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; case ISD::FP_TO_FP16: { @@ -4548,45 +4800,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } // Constant fold unary operations with a vector integer or float operand. - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) { - if (BV->isConstant()) { - switch (Opcode) { - default: - // FIXME: Entirely reasonable to perform folding of other unary - // operations here as the need arises. - break; - case ISD::FNEG: - case ISD::FABS: - case ISD::FCEIL: - case ISD::FTRUNC: - case ISD::FFLOOR: - case ISD::FP_EXTEND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::TRUNCATE: - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::UINT_TO_FP: - case ISD::SINT_TO_FP: - case ISD::ABS: - case ISD::BITREVERSE: - case ISD::BSWAP: - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: - case ISD::CTPOP: { - SDValue Ops = { Operand }; - if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) - return Fold; - } - } - } + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::ABS: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: { + SDValue Ops = {Operand}; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; + } } unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { + case ISD::STEP_VECTOR: + assert(VT.isScalableVector() && + "STEP_VECTOR can only be used with scalable types"); + assert(OpOpcode == ISD::TargetConstant && + VT.getVectorElementType() == Operand.getValueType() && + "Unexpected step operand"); + break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); break; @@ -4641,7 +4896,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); - else if (OpOpcode == ISD::UNDEF) + if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); break; @@ -4660,7 +4915,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0)); - else if (OpOpcode == ISD::UNDEF) + if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); break; @@ -4682,7 +4937,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); - else if (OpOpcode == ISD::UNDEF) + if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); // (ext (trunc x)) -> x @@ -4728,8 +4983,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(Operand.getValueType().bitsLE(VT) && "The input must be the same size or smaller than the result."); - assert(VT.getVectorNumElements() < - Operand.getValueType().getVectorNumElements() && + assert(VT.getVectorMinNumElements() < + Operand.getValueType().getVectorMinNumElements() && "The destination vector type must have fewer lanes than the input."); break; case ISD::ABS: @@ -4879,6 +5134,18 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, if (!C2.getBoolValue()) break; return C1.srem(C2); + case ISD::MULHS: { + unsigned FullWidth = C1.getBitWidth() * 2; + APInt C1Ext = C1.sext(FullWidth); + APInt C2Ext = C2.sext(FullWidth); + return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); + } + case ISD::MULHU: { + unsigned FullWidth = C1.getBitWidth() * 2; + APInt C1Ext = C1.zext(FullWidth); + APInt C2Ext = C2.zext(FullWidth); + return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); + } } return llvm::None; } @@ -4933,7 +5200,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. - if (Opcode >= ISD::BUILTIN_OP_END) + // We can't create a scalar CONCAT_VECTORS so skip it. It will break + // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by + // foldCONCAT_VECTORS in getNode before this is called. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); // For now, the array Ops should only contain two values. @@ -4973,27 +5243,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // TODO: All the folds below are performed lane-by-lane and assume a fixed - // vector width, however we should be able to do constant folds involving - // splat vector nodes too. - if (VT.isScalableVector()) - return SDValue(); - // For fixed width vectors, extract each constant element and fold them // individually. Either input may be an undef value. - auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); - if (!BV1 && !N1->isUndef()) + bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || + N1->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV1 && !N1->isUndef()) return SDValue(); - auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); - if (!BV2 && !N2->isUndef()) + bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || + N2->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV2 && !N2->isUndef()) return SDValue(); // If both operands are undef, that's handled the same way as scalars. - if (!BV1 && !BV2) + if (!IsBVOrSV1 && !IsBVOrSV2) return SDValue(); - assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && - "Vector binop with different number of elements in operands?"); - EVT SVT = VT.getScalarType(); EVT LegalSVT = SVT; if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { @@ -5001,19 +5264,46 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (LegalSVT.bitsLT(SVT)) return SDValue(); } + SmallVector<SDValue, 4> Outputs; - unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands(); + unsigned NumOps = 0; + if (IsBVOrSV1) + NumOps = std::max(NumOps, N1->getNumOperands()); + if (IsBVOrSV2) + NumOps = std::max(NumOps, N2->getNumOperands()); + assert(NumOps != 0 && "Expected non-zero operands"); + // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need + // one iteration for that. + assert((!VT.isScalableVector() || NumOps == 1) && + "Scalable vector should only have one scalar"); + for (unsigned I = 0; I != NumOps; ++I) { - SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT); - SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT); + // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need + // to use operand 0 of the SPLAT_VECTOR for each fixed element. + SDValue V1; + if (N1->getOpcode() == ISD::BUILD_VECTOR) + V1 = N1->getOperand(I); + else if (N1->getOpcode() == ISD::SPLAT_VECTOR) + V1 = N1->getOperand(0); + else + V1 = getUNDEF(SVT); + + SDValue V2; + if (N2->getOpcode() == ISD::BUILD_VECTOR) + V2 = N2->getOperand(I); + else if (N2->getOpcode() == ISD::SPLAT_VECTOR) + V2 = N2->getOperand(0); + else + V2 = getUNDEF(SVT); + if (SVT.isInteger()) { - if (V1->getValueType(0).bitsGT(SVT)) + if (V1.getValueType().bitsGT(SVT)) V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2->getValueType(0).bitsGT(SVT)) + if (V2.getValueType().bitsGT(SVT)) V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); } - if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + if (V1.getValueType() != SVT || V2.getValueType() != SVT) return SDValue(); // Fold one vector element. @@ -5028,14 +5318,21 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, Outputs.push_back(ScalarResult); } - assert(VT.getVectorNumElements() == Outputs.size() && - "Vector size mismatch!"); + if (N1->getOpcode() == ISD::BUILD_VECTOR || + N2->getOpcode() == ISD::BUILD_VECTOR) { + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); + + // Build a big vector out of the scalar elements we generated. + return getBuildVector(VT, SDLoc(), Outputs); + } - // We may have a vector type but a scalar result. Create a splat. - Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + assert((N1->getOpcode() == ISD::SPLAT_VECTOR || + N2->getOpcode() == ISD::SPLAT_VECTOR) && + "One operand should be a splat vector"); - // Build a big vector out of the scalar elements we generated. - return getBuildVector(VT, SDLoc(), Outputs); + assert(Outputs.size() == 1 && "Vector size mismatch!"); + return getSplatVector(VT, SDLoc(), Outputs[0]); } // TODO: Merge with FoldConstantArithmetic @@ -5056,30 +5353,26 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (!VT.isVector()) return SDValue(); - // TODO: All the folds below are performed lane-by-lane and assume a fixed - // vector width, however we should be able to do constant folds involving - // splat vector nodes too. - if (VT.isScalableVector()) - return SDValue(); - - // From this point onwards all vectors are assumed to be fixed width. - unsigned NumElts = VT.getVectorNumElements(); + ElementCount NumElts = VT.getVectorElementCount(); - auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { return !Op.getValueType().isVector() || - Op.getValueType().getVectorNumElements() == NumElts; + Op.getValueType().getVectorElementCount() == NumElts; }; - auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { + APInt SplatVal; BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); - return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) || - (BV && BV->isConstant()); + return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || + (BV && BV->isConstant()) || + (Op.getOpcode() == ISD::SPLAT_VECTOR && + ISD::isConstantSplatVector(Op.getNode(), SplatVal)); }; // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) || !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); @@ -5096,14 +5389,19 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, return SDValue(); } + // For scalable vector types we know we're dealing with SPLAT_VECTORs. We + // only have one operand to check. For fixed-length vector types we may have + // a combination of BUILD_VECTOR and SPLAT_VECTOR. + unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); + // Constant fold each scalar lane separately. SmallVector<SDValue, 4> ScalarResults; - for (unsigned i = 0; i != NumElts; i++) { + for (unsigned I = 0; I != NumOperands; I++) { SmallVector<SDValue, 4> ScalarOps; for (SDValue Op : Ops) { EVT InSVT = Op.getValueType().getScalarType(); - BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); - if (!InBV) { + if (Op.getOpcode() != ISD::BUILD_VECTOR && + Op.getOpcode() != ISD::SPLAT_VECTOR) { // We've checked that this is UNDEF or a constant of some kind. if (Op.isUndef()) ScalarOps.push_back(getUNDEF(InSVT)); @@ -5112,7 +5410,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, continue; } - SDValue ScalarOp = InBV->getOperand(i); + SDValue ScalarOp = + Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I); EVT ScalarVT = ScalarOp.getValueType(); // Build vector (integer) scalar operands may need implicit @@ -5137,7 +5436,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, ScalarResults.push_back(ScalarResult); } - SDValue V = getBuildVector(VT, DL, ScalarResults); + SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0]) + : getBuildVector(VT, DL, ScalarResults); NewSDValueDbgMsg(V, "New node fold constant vector: ", this); return V; } @@ -5243,6 +5543,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { + assert(N1.getOpcode() != ISD::DELETED_NODE && + N2.getOpcode() != ISD::DELETED_NODE && + "Operand is DELETED_NODE!"); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -5304,14 +5607,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // it's worth handling here. if (N2C && N2C->isNullValue()) return N1; + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && + VT.getVectorElementType() == MVT::i1) + return getNode(ISD::XOR, DL, VT, N1, N2); break; case ISD::MUL: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + return getNode(ISD::AND, DL, VT, N1, N2); if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { - APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); - APInt N2CImm = N2C->getAPIntValue(); + const APInt &MulImm = N1->getConstantOperandAPInt(0); + const APInt &N2CImm = N2C->getAPIntValue(); return getVScale(DL, VT, MulImm * N2CImm); } break; @@ -5328,6 +5636,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { + // fold (add_sat x, y) -> (or x, y) for bool types. + if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT) + return getNode(ISD::OR, DL, VT, N1, N2); + // fold (sub_sat x, y) -> (and x, ~y) for bool types. + if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT) + return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); + } break; case ISD::SMIN: case ISD::UMAX: @@ -5364,8 +5680,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::SHL: if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { - APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); - APInt ShiftImm = N2C->getAPIntValue(); + const APInt &MulImm = N1->getConstantOperandAPInt(0); + const APInt &ShiftImm = N2C->getAPIntValue(); return getVScale(DL, VT, MulImm << ShiftImm); } LLVM_FALLTHROUGH; @@ -5444,6 +5760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, const APInt &Val = N1C->getAPIntValue(); return SignExtendInReg(Val, VT); } + if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { SmallVector<SDValue, 8> Ops; llvm::EVT OpVT = N1.getOperand(0).getValueType(); @@ -5461,6 +5778,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } break; } + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: { + assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() && + N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT"); + assert(N1.getValueType().isVector() == VT.isVector() && + "FP_TO_*INT_SAT type should be vector iff the operand type is " + "vector!"); + assert((!VT.isVector() || VT.getVectorNumElements() == + N1.getValueType().getVectorNumElements()) && + "Vector element counts must match in FP_TO_*INT_SAT"); + assert(!cast<VTSDNode>(N2)->getVT().isVector() && + "Type to saturate to must be a scalar."); + assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) && + "Not extending!"); + break; + } case ISD::EXTRACT_VECTOR_ELT: assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ @@ -5523,10 +5856,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { if (VT == N1.getOperand(1).getValueType()) return N1.getOperand(1); - else - return getSExtOrTrunc(N1.getOperand(1), DL, VT); + return getSExtOrTrunc(N1.getOperand(1), DL, VT); } - return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); } } @@ -5563,11 +5894,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1C) { unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); - APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); - return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); + const APInt &Val = N1C->getAPIntValue(); + return getConstant(Val.extractBits(ElementSize, Shift), DL, VT); } break; - case ISD::EXTRACT_SUBVECTOR: + case ISD::EXTRACT_SUBVECTOR: { EVT N1VT = N1.getValueType(); assert(VT.isVector() && N1VT.isVector() && "Extract subvector VTs must be vectors!"); @@ -5584,9 +5915,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1VT.getVectorMinNumElements()) && "Extract subvector overflow!"); assert(N2C->getAPIntValue().getBitWidth() == - TLI->getVectorIdxTy(getDataLayout()) - .getSizeInBits() - .getFixedSize() && + TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && "Constant index for EXTRACT_SUBVECTOR has an invalid size"); // Trivial extraction. @@ -5612,6 +5941,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return N1.getOperand(1); break; } + } // Perform trivial constant folding. if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) @@ -5707,6 +6037,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, const SDNodeFlags Flags) { + assert(N1.getOpcode() != ISD::DELETED_NODE && + N2.getOpcode() != ISD::DELETED_NODE && + N3.getOpcode() != ISD::DELETED_NODE && + "Operand is DELETED_NODE!"); // Perform various simplifications. switch (Opcode) { case ISD::FMA: { @@ -5806,6 +6140,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, cast<ConstantSDNode>(N3)->getZExtValue()) <= VT.getVectorMinNumElements()) && "Insert subvector overflow!"); + assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() == + TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && + "Constant index for INSERT_SUBVECTOR has an invalid size"); // Trivial insertion. if (VT == N2VT) @@ -5939,17 +6276,17 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, if (Slice.Array == nullptr) { if (VT.isInteger()) return DAG.getConstant(0, dl, VT); - else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) return DAG.getConstantFP(0.0, dl, VT); - else if (VT.isVector()) { + if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getConstant(0, dl, EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); - } else - llvm_unreachable("Expected type!"); + } + llvm_unreachable("Expected type!"); } assert(!VT.isVector() && "Can't handle vector type here!"); @@ -6056,7 +6393,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6103,7 +6441,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TRI->needsStackRealignment(MF)) + if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign = NewAlign / 2; @@ -6115,6 +6453,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } } + // Prepare AAInfo for loads/stores after lowering this memcpy. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 16> OutLoadChains; @@ -6157,7 +6499,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } } @@ -6181,13 +6523,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6246,7 +6588,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memmove of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6289,6 +6632,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } } + // Prepare AAInfo for loads/stores after lowering this memmove. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; uint64_t SrcOff = 0, DstOff = 0; @@ -6307,10 +6654,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = - DAG.getLoad(VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), - SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags); + Value = DAG.getLoad( + VT, dl, Chain, + DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6322,10 +6669,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = - DAG.getStore(Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + Store = DAG.getStore( + Chain, dl, LoadValues[i], + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; } @@ -6354,7 +6701,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, Align Alignment, bool isVol, - MachinePointerInfo DstPtrInfo) { + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6401,6 +6749,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, LargestVT = MemOps[i]; SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + // Prepare AAInfo for loads/stores after lowering this memset. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; @@ -6426,7 +6778,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, - isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, + NewAAInfo); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; Size -= VTSize; @@ -6449,7 +6802,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6460,7 +6814,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); if (Result.getNode()) return Result; } @@ -6481,7 +6835,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo); + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -6563,7 +6917,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6574,7 +6929,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemmoveLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); if (Result.getNode()) return Result; } @@ -6664,7 +7019,8 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, - MachinePointerInfo DstPtrInfo) { + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6675,7 +7031,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, DstPtrInfo); + isVol, DstPtrInfo, AAInfo); if (Result.getNode()) return Result; @@ -6839,8 +7195,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SmallVector<EVT, 4> VTs; VTs.reserve(Ops.size()); - for (unsigned i = 0; i < Ops.size(); ++i) - VTs.push_back(Ops[i].getValueType()); + for (const SDValue &Op : Ops) + VTs.push_back(Op.getValueType()); return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } @@ -7355,7 +7711,7 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, AM, ST->isTruncatingStore(), ST->isCompressingStore()); } -SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, +SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, @@ -7364,9 +7720,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( - dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy)); + dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7374,9 +7730,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]); + IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO, IndexType, ExtTy); + VTs, MemVT, MMO, IndexType, ExtTy); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7402,7 +7758,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, return V; } -SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, @@ -7411,9 +7767,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( - dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc)); + dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7421,9 +7777,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]); + IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO, IndexType, IsTrunc); + VTs, MemVT, MMO, IndexType, IsTrunc); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorElementCount() == @@ -7588,6 +7944,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, default: break; } +#ifndef NDEBUG + for (auto &Op : Ops) + assert(Op.getOpcode() != ISD::DELETED_NODE && + "Operand is DELETED_NODE!"); +#endif + switch (Opcode) { default: break; case ISD::BUILD_VECTOR: @@ -7661,6 +8023,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); +#ifndef NDEBUG + for (auto &Op : Ops) + assert(Op.getOpcode() != ISD::DELETED_NODE && + "Operand is DELETED_NODE!"); +#endif + switch (Opcode) { case ISD::STRICT_FP_EXTEND: assert(VTList.NumVTs == 2 && Ops.size() == 2 && @@ -8397,7 +8765,9 @@ SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) - SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O); + SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(N, R), + {}, IsIndirect, DL, O, + /*IsVariadic=*/false); } /// Constant @@ -8407,7 +8777,10 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O); + return new (DbgInfo->getAlloc()) + SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(C), {}, + /*IsIndirect=*/false, DL, O, + /*IsVariadic=*/false); } /// FrameIndex @@ -8418,19 +8791,46 @@ SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); + return getFrameIndexDbgValue(Var, Expr, FI, {}, IsIndirect, DL, O); +} + +/// FrameIndex with dependencies +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, + DIExpression *Expr, unsigned FI, + ArrayRef<SDNode *> Dependencies, + bool IsIndirect, + const DebugLoc &DL, + unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) - SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); + SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FI), + Dependencies, IsIndirect, DL, O, + /*IsVariadic=*/false); } /// VReg -SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, - DIExpression *Expr, +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VReg, bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) - SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); + SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg), + {}, IsIndirect, DL, O, + /*IsVariadic=*/false); +} + +SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr, + ArrayRef<SDDbgOperand> Locs, + ArrayRef<SDNode *> Dependencies, + bool IsIndirect, const DebugLoc &DL, + unsigned O, bool IsVariadic) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect, + DL, O, IsVariadic); } void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -8449,15 +8849,31 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, if (!FromNode->getHasDebugValue()) return; + SDDbgOperand FromLocOp = + SDDbgOperand::fromNode(From.getNode(), From.getResNo()); + SDDbgOperand ToLocOp = SDDbgOperand::fromNode(To.getNode(), To.getResNo()); + SmallVector<SDDbgValue *, 2> ClonedDVs; for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { - if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) + if (Dbg->isInvalidated()) continue; // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); - // Just transfer the dbg value attached to From. - if (Dbg->getResNo() != From.getResNo()) + // Create a new location ops vector that is equal to the old vector, but + // with each instance of FromLocOp replaced with ToLocOp. + bool Changed = false; + auto NewLocOps = Dbg->copyLocationOps(); + std::replace_if( + NewLocOps.begin(), NewLocOps.end(), + [&Changed, FromLocOp](const SDDbgOperand &Op) { + bool Match = Op == FromLocOp; + Changed |= Match; + return Match; + }, + ToLocOp); + // Ignore this SDDbgValue if we didn't find a matching location. + if (!Changed) continue; DIVariable *Var = Dbg->getVariable(); @@ -8476,10 +8892,13 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, continue; Expr = *Fragment; } + + auto AdditionalDependencies = Dbg->getAdditionalDependencies(); // Clone the SDDbgValue and move it to To. - SDDbgValue *Clone = getDbgValue( - Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(), - std::max(ToNode->getIROrder(), Dbg->getOrder())); + SDDbgValue *Clone = getDbgValueList( + Var, Expr, NewLocOps, AdditionalDependencies, Dbg->isIndirect(), + Dbg->getDebugLoc(), std::max(ToNode->getIROrder(), Dbg->getOrder()), + Dbg->isVariadic()); ClonedDVs.push_back(Clone); if (InvalidateDbg) { @@ -8489,8 +8908,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, } } - for (SDDbgValue *Dbg : ClonedDVs) - AddDbgValue(Dbg, ToNode, false); + for (SDDbgValue *Dbg : ClonedDVs) { + assert(is_contained(Dbg->getSDNodes(), ToNode) && + "Transferred DbgValues should depend on the new SDNode"); + AddDbgValue(Dbg, false); + } } void SelectionDAG::salvageDebugInfo(SDNode &N) { @@ -8510,16 +8932,35 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { if (!isConstantIntBuildVectorOrConstantInt(N0) && isConstantIntBuildVectorOrConstantInt(N1)) { uint64_t Offset = N.getConstantOperandVal(1); + // Rewrite an ADD constant node into a DIExpression. Since we are // performing arithmetic to compute the variable's *value* in the // DIExpression, we need to mark the expression with a // DW_OP_stack_value. auto *DIExpr = DV->getExpression(); - DIExpr = - DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset); - SDDbgValue *Clone = - getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), - DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + auto NewLocOps = DV->copyLocationOps(); + bool Changed = false; + for (size_t i = 0; i < NewLocOps.size(); ++i) { + // We're not given a ResNo to compare against because the whole + // node is going away. We know that any ISD::ADD only has one + // result, so we can assume any node match is using the result. + if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || + NewLocOps[i].getSDNode() != &N) + continue; + NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo()); + SmallVector<uint64_t, 3> ExprOps; + DIExpression::appendOffset(ExprOps, Offset); + DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); + Changed = true; + } + (void)Changed; + assert(Changed && "Salvage target doesn't use N"); + + auto AdditionalDependencies = DV->getAdditionalDependencies(); + SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr, + NewLocOps, AdditionalDependencies, + DV->isIndirect(), DV->getDebugLoc(), + DV->getOrder(), DV->isVariadic()); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); DV->setIsEmitted(); @@ -8530,8 +8971,11 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { } } - for (SDDbgValue *Dbg : ClonedDVs) - AddDbgValue(Dbg, Dbg->getSDNode(), false); + for (SDDbgValue *Dbg : ClonedDVs) { + assert(!Dbg->getSDNodes().empty() && + "Salvaged DbgValue should depend on a new SDNode"); + AddDbgValue(Dbg, false); + } } /// Creates a SDDbgLabel node. @@ -8965,9 +9409,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) { - SDNode *P = *UI; + for (SDNode *P : N->uses()) { unsigned Degree = P->getNodeId(); assert(Degree != 0 && "Invalid node degree"); --Degree; @@ -9014,17 +9456,17 @@ unsigned SelectionDAG::AssignTopologicalOrder() { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. -void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { - if (SD) { +void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { + for (SDNode *SD : DB->getSDNodes()) { + if (!SD) + continue; assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); SD->setHasDebugValue(true); } - DbgInfo->add(DB, SD, isParameter); + DbgInfo->add(DB, isParameter); } -void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { - DbgInfo->add(DB); -} +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); } SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain) { @@ -9226,21 +9668,22 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. - ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); + ConstantSDNode *C = + isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); return C && C->isNullValue(); } -bool llvm::isOneOrOneSplat(SDValue N) { +bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; } -bool llvm::isAllOnesOrAllOnesSplat(SDValue N) { +bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { N = peekThroughBitcasts(N); unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; } @@ -9290,8 +9733,8 @@ namespace { std::vector<EVT> VTs; EVTArray() { - VTs.reserve(MVT::LAST_VALUETYPE); - for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.reserve(MVT::VALUETYPE_SIZE); + for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i) VTs.push_back(MVT((MVT::SimpleValueType)i)); } }; @@ -9308,11 +9751,9 @@ const EVT *SDNode::getValueTypeList(EVT VT) { if (VT.isExtended()) { sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); - } else { - assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && - "Value type out of range!"); - return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } + assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!"); + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the @@ -9890,10 +10331,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, // FIXME: This does not work for vectors with elements less than 8 bits. while (VecWidth > 8) { unsigned HalfSize = VecWidth / 2; - APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); - APInt LowValue = SplatValue.trunc(HalfSize); - APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); - APInt LowUndef = SplatUndef.trunc(HalfSize); + APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); + APInt LowValue = SplatValue.extractBits(HalfSize, 0); + APInt HighUndef = SplatUndef.extractBits(HalfSize, HalfSize); + APInt LowUndef = SplatUndef.extractBits(HalfSize, 0); // If the two halves do not match (ignoring undef bits), stop here. if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6638ff6a6358..d56d4bcc9169 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -436,14 +436,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } else if (ValueVT.bitsLT(PartEVT)) { - // Bitcast Val back the original type and extract the corresponding - // vector we want. - unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); - EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), - ValueVT.getVectorElementType(), Elts); - Val = DAG.getBitcast(WiderVecType, Val); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getVectorIdxConstant(0, DL)); + const uint64_t ValueSize = ValueVT.getFixedSizeInBits(); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); + // Drop the extra bits. + Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); + return DAG.getBitcast(ValueVT, Val); } diagnosePossiblyInvalidConstraint( @@ -610,30 +607,39 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, std::reverse(Parts, Parts + OrigNumParts); } -static SDValue widenVectorToPartType(SelectionDAG &DAG, - SDValue Val, const SDLoc &DL, EVT PartVT) { - if (!PartVT.isFixedLengthVector()) +static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL, EVT PartVT) { + if (!PartVT.isVector()) return SDValue(); EVT ValueVT = Val.getValueType(); - unsigned PartNumElts = PartVT.getVectorNumElements(); - unsigned ValueNumElts = ValueVT.getVectorNumElements(); - if (PartNumElts > ValueNumElts && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { - EVT ElementVT = PartVT.getVectorElementType(); - // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in - // undef elements. - SmallVector<SDValue, 16> Ops; - DAG.ExtractVectorElements(Val, Ops); - SDValue EltUndef = DAG.getUNDEF(ElementVT); - for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i) - Ops.push_back(EltUndef); + ElementCount PartNumElts = PartVT.getVectorElementCount(); + ElementCount ValueNumElts = ValueVT.getVectorElementCount(); + + // We only support widening vectors with equivalent element types and + // fixed/scalable properties. If a target needs to widen a fixed-length type + // to a scalable one, it should be possible to use INSERT_SUBVECTOR below. + if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) || + PartNumElts.isScalable() != ValueNumElts.isScalable() || + PartVT.getVectorElementType() != ValueVT.getVectorElementType()) + return SDValue(); - // FIXME: Use CONCAT for 2x -> 4x. - return DAG.getBuildVector(PartVT, DL, Ops); - } + // Widening a scalable vector to another scalable vector is done by inserting + // the vector into a larger undef one. + if (PartNumElts.isScalable()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), + Val, DAG.getVectorIdxConstant(0, DL)); - return SDValue(); + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + DAG.ExtractVectorElements(Val, Ops); + SDValue EltUndef = DAG.getUNDEF(ElementVT); + Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef); + + // FIXME: Use CONCAT for 2x -> 4x. + return DAG.getBuildVector(PartVT, DL, Ops); } /// getCopyToPartsVector - Create a series of nodes that contain the specified @@ -714,13 +720,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT BuiltVectorTy = EVT::getVectorVT( *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); - if (ValueVT != BuiltVectorTy) { - if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) - Val = Widened; + if (ValueVT == BuiltVectorTy) { + // Nothing to do. + } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { + // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + } else if (SDValue Widened = + widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { + Val = Widened; + } else if (BuiltVectorTy.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + BuiltVectorTy.getVectorElementCount() == + ValueVT.getVectorElementCount()) { + // Promoted vector extract + Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy); } + assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -970,8 +988,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value], + RegisterVT); for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; @@ -1119,6 +1138,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, + DebugLoc DL, unsigned Order) { + // We treat variadic dbg_values differently at this stage. + if (DI->hasArgList()) { + // For variadic dbg_values we will now insert an undef. + // FIXME: We can potentially recover these! + SmallVector<SDDbgOperand, 2> Locs; + for (const Value *V : DI->getValues()) { + auto Undef = UndefValue::get(V->getType()); + Locs.push_back(SDDbgOperand::fromConst(Undef)); + } + SDDbgValue *SDV = DAG.getDbgValueList( + DI->getVariable(), DI->getExpression(), Locs, {}, + /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true); + DAG.AddDbgValue(SDV, /*isParameter=*/false); + } else { + // TODO: Dangling debug info will eventually either be resolved or produce + // an Undef DBG_VALUE. However in the resolution case, a gap may appear + // between the original dbg.value location and its resolved DBG_VALUE, + // which we should ideally fill with an extra Undef DBG_VALUE. + assert(DI->getNumVariableLocationOps() == 1 && + "DbgValueInst without an ArgList should have a single location " + "operand."); + DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order); + } +} + void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr) { auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { @@ -1156,6 +1202,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; for (auto &DDI : DDIV) { const DbgValueInst *DI = DDI.getDI(); + assert(!DI->hasArgList() && "Not implemented for variadic dbg_values"); assert(DI && "Ill-formed DanglingDebugInfo"); DebugLoc dl = DDI.getdl(); unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); @@ -1185,37 +1232,41 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, << ValSDNodeOrder << "\n"); SDV = getDbgValue(Val, Variable, Expr, dl, std::max(DbgSDNodeOrder, ValSDNodeOrder)); - DAG.AddDbgValue(SDV, Val.getNode(), false); + DAG.AddDbgValue(SDV, false); } else LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI << "in EmitFuncArgumentDbgValue\n"); } else { LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - auto Undef = - UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); auto SDV = DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + DAG.AddDbgValue(SDV, false); } } DDIV.clear(); } void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { - Value *V = DDI.getDI()->getValue(); + // TODO: For the variadic implementation, instead of only checking the fail + // state of `handleDebugValue`, we need know specifically which values were + // invalid, so that we attempt to salvage only those values when processing + // a DIArgList. + assert(!DDI.getDI()->hasArgList() && + "Not implemented for variadic dbg_values"); + Value *V = DDI.getDI()->getValue(0); DILocalVariable *Var = DDI.getDI()->getVariable(); DIExpression *Expr = DDI.getDI()->getExpression(); DebugLoc DL = DDI.getdl(); DebugLoc InstDL = DDI.getDI()->getDebugLoc(); unsigned SDOrder = DDI.getSDNodeOrder(); - // Currently we consider only dbg.value intrinsics -- we tell the salvager // that DW_OP_stack_value is desired. assert(isa<DbgValueInst>(DDI.getDI())); bool StackValue = true; // Can this Value can be encoded without any further work? - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false)) return; // Attempt to salvage back through as many instructions as possible. Bail if @@ -1223,20 +1274,27 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // variable. FIXME: Further work could recover those too. while (isa<Instruction>(V)) { Instruction &VAsInst = *cast<Instruction>(V); - DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue); + // Temporary "0", awaiting real implementation. + SmallVector<Value *, 4> AdditionalValues; + DIExpression *SalvagedExpr = + salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. - if (!NewExpr) + // TODO: If AdditionalValues isn't empty, then the salvage can only be + // represented with a DBG_VALUE_LIST, so we give up. When we have support + // here for variadic dbg_values, remove that condition. + if (!SalvagedExpr || !AdditionalValues.empty()) break; // New value and expr now represent this debuginfo. V = VAsInst.getOperand(0); - Expr = NewExpr; + Expr = SalvagedExpr; // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) { + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, + /*IsVariadic=*/false)) { LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " << DDI.getDI() << "\nBy stripping back to:\n " << V); return; @@ -1246,9 +1304,9 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // This was the final opportunity to salvage this debug information, and it // couldn't be done. Place an undef DBG_VALUE at this location to terminate // any earlier variable location. - auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + DAG.AddDbgValue(SDV, false); LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() << "\n"); @@ -1256,53 +1314,72 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { << "\n"); } -bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, +bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, + DILocalVariable *Var, DIExpression *Expr, DebugLoc dl, - DebugLoc InstDL, unsigned Order) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDDbgValue *SDV; - if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || - isa<ConstantPointerNull>(V)) { - SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + DebugLoc InstDL, unsigned Order, + bool IsVariadic) { + if (Values.empty()) return true; - } + SmallVector<SDDbgOperand> LocationOps; + SmallVector<SDNode *> Dependencies; + for (const Value *V : Values) { + // Constant value. + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || + isa<ConstantPointerNull>(V)) { + LocationOps.emplace_back(SDDbgOperand::fromConst(V)); + continue; + } - // If the Value is a frame index, we can create a FrameIndex debug value - // without relying on the DAG at all. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - auto SI = FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - auto SDV = - DAG.getFrameIndexDbgValue(Var, Expr, SI->second, - /*IsIndirect*/ false, dl, SDNodeOrder); - // Do not attach the SDNodeDbgValue to an SDNode: this variable location - // is still available even if the SDNode gets optimized out. - DAG.AddDbgValue(SDV, nullptr, false); - return true; + // If the Value is a frame index, we can create a FrameIndex debug value + // without relying on the DAG at all. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second)); + continue; + } } - } - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) - return true; - SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - return true; - } + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + // Only emit func arg dbg value for non-variadic dbg.values for now. + if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + return true; + if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { + // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can + // describe stack slot locations. + // + // Consider "int x = 0; int *px = &x;". There are two kinds of + // interesting debug values here after optimization: + // + // dbg.value(i32* %px, !"int *px", !DIExpression()), and + // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) + // + // Both describe the direct values of their associated variables. + Dependencies.push_back(N.getNode()); + LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex())); + continue; + } + LocationOps.emplace_back( + SDDbgOperand::fromNode(N.getNode(), N.getResNo())); + continue; + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Special rules apply for the first dbg.values of parameter variables in a + // function. Identify them by the fact they reference Argument Values, that + // they're parameters, and they are parameters of the current function. We + // need to let them dangle until they get an SDNode. + bool IsParamOfFunc = + isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt(); + if (IsParamOfFunc) + return false; - // Special rules apply for the first dbg.values of parameter variables in a - // function. Identify them by the fact they reference Argument Values, that - // they're parameters, and they are parameters of the current function. We - // need to let them dangle until they get an SDNode. - bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() && - !InstDL.getInlinedAt(); - if (!IsParamOfFunc) { // The value is not used in this block yet (or it would have an SDNode). // We still want the value to appear for the user if possible -- if it has // an associated VReg, we can refer to that instead. @@ -1314,6 +1391,9 @@ bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), None); if (RFV.occupiesMultipleRegs()) { + // FIXME: We could potentially support variadic dbg_values here. + if (IsVariadic) + return false; unsigned Offset = 0; unsigned BitsToDescribe = 0; if (auto VarSize = Var->getSizeInBits()) @@ -1321,31 +1401,41 @@ bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, if (auto Fragment = Expr->getFragmentInfo()) BitsToDescribe = Fragment->SizeInBits; for (auto RegAndSize : RFV.getRegsAndSizes()) { - unsigned RegisterSize = RegAndSize.second; // Bail out if all bits are described already. if (Offset >= BitsToDescribe) break; + // TODO: handle scalable vectors. + unsigned RegisterSize = RegAndSize.second; unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) - ? BitsToDescribe - Offset - : RegisterSize; + ? BitsToDescribe - Offset + : RegisterSize; auto FragmentExpr = DIExpression::createFragmentExpression( Expr, Offset, FragmentSize); if (!FragmentExpr) - continue; - SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first, - false, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + continue; + SDDbgValue *SDV = DAG.getVRegDbgValue( + Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, false); Offset += RegisterSize; } - } else { - SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + return true; } - return true; + // We can use simple vreg locations for variadic dbg_values as well. + LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg)); + continue; } + // We failed to create a SDDbgOperand for V. + return false; } - return false; + // We have created a SDDbgOperand for each Value in Values. + // Should use Order instead of SDNodeOrder? + assert(!LocationOps.empty()); + SDDbgValue *SDV = + DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, + /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic); + DAG.AddDbgValue(SDV, /*isParameter=*/false); + return true; } void SelectionDAGBuilder::resolveOrClearDbgInfo() { @@ -1458,9 +1548,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { SmallVector<SDValue, 4> Constants; - for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); - OI != OE; ++OI) { - SDNode *Val = getValue(*OI).getNode(); + for (const Use &U : C->operands()) { + SDNode *Val = getValue(U).getNode(); // If the operand is an empty aggregate, there are no values. if (!Val) continue; // Add each leaf value from the operand to the Constants list @@ -1592,6 +1681,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // Update machine-CFG edge. MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; FuncInfo.MBB->addSuccessor(TargetMBB); + TargetMBB->setIsEHCatchretTarget(true); + DAG.getMachineFunction().setHasEHCatchret(true); auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsSEH = isAsynchronousEHPersonality(Pers); @@ -1851,7 +1942,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( I.getOperand(0)->getType(), F->getCallingConv(), - /*IsVarArg*/ false); + /*IsVarArg*/ false, DL); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, @@ -1991,7 +2082,7 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, // If this is an argument, we can export it if the BB is the entry block or // if it is already exported. if (isa<Argument>(V)) { - if (FromBB == &FromBB->getParent()->getEntryBlock()) + if (FromBB->isEntryBlock()) return true; // Otherwise, can only export this if it is already exported. @@ -2782,23 +2873,27 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, - LLVMContext::OB_gc_transition, - LLVMContext::OB_gc_live, - LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget}) && + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, + LLVMContext::OB_gc_live, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, + LLVMContext::OB_clang_arc_attachedcall}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) - visitInlineAsm(I); + visitInlineAsm(I, EHPadBB); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: llvm_unreachable("Cannot invoke this intrinsic"); case Intrinsic::donothing: // Ignore invokes to @llvm.donothing: jump directly to the next BB. + case Intrinsic::seh_try_begin: + case Intrinsic::seh_scope_begin: + case Intrinsic::seh_try_end: + case Intrinsic::seh_scope_end: break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: @@ -2829,7 +2924,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else { - LowerCallTo(I, getValue(Callee), false, EHPadBB); + LowerCallTo(I, getValue(Callee), false, false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it @@ -4273,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, Base = SDB->getValue(C); - unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements(); + ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts); Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT); IndexType = ISD::SIGNED_SCALED; @@ -4314,7 +4409,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { EVT VT = Src0.getValueType(); Align Alignment = cast<ConstantInt>(I.getArgOperand(2)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT)); + .getValueOr(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; @@ -4339,6 +4434,14 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } + + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); + } + SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO, IndexType, false); @@ -4424,7 +4527,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Align Alignment = cast<ConstantInt>(I.getArgOperand(1)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT)); + .getValueOr(DAG.getEVTAlign(VT.getScalarType())); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4450,6 +4553,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } + + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO, IndexType, ISD::NON_EXTLOAD); @@ -4702,6 +4813,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDVTList VTs = DAG.getVTList(ValueVTs); + // Propagate fast-math-flags from IR to node(s). + SDNodeFlags Flags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPMO); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + // Create the node. SDValue Result; if (IsTgtIntrinsic) { @@ -5377,6 +5494,8 @@ getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, /// If the DbgValueInst is a dbg_value of a function argument, create the /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. +/// We don't currently support this for variadic dbg_values, as they shouldn't +/// appear for function arguments or in the prologue. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, bool IsDbgDeclare, const SDValue &N) { @@ -5384,6 +5503,35 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Arg) return false; + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind + // we've been asked to pursue. + auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr, + bool Indirect) { + if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) { + // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF + // pointing at the VReg, which will be patched up later. + auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); + auto MIB = BuildMI(MF, DL, Inst); + MIB.addReg(Reg, RegState::Debug); + MIB.addImm(0); + MIB.addMetadata(Variable); + auto *NewDIExpr = FragExpr; + // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into + // the DIExpression. + if (Indirect) + NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore); + MIB.addMetadata(NewDIExpr); + return MIB; + } else { + // Create a completely standard DBG_VALUE. + auto &Inst = TII->get(TargetOpcode::DBG_VALUE); + return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr); + } + }; + if (!IsDbgDeclare) { // ArgDbgValues are hoisted to the beginning of the entry block. So we // should only emit as ArgDbgValue if the dbg.value intrinsic is found in @@ -5449,9 +5597,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } - MachineFunction &MF = DAG.getMachineFunction(); - const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. @@ -5518,13 +5663,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!FragmentExpr) { SDDbgValue *SDV = DAG.getConstantDbgValue( Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); + DAG.AddDbgValue(SDV, false); continue; } - assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - RegAndSize.first, Variable, *FragmentExpr)); + MachineInstr *NewMI = + MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare); + FuncInfo.ArgDbgValues.push_back(NewMI); } }; @@ -5555,11 +5699,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - IsIndirect = (Op->isReg()) ? IsIndirect : true; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - *Op, Variable, Expr)); + MachineInstr *NewMI = nullptr; + if (Op->isReg()) + NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect); + else + NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op, + Variable, Expr); + + FuncInfo.ArgDbgValues.push_back(NewMI); return true; } @@ -5616,7 +5764,7 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, SDValue Callee = DAG.getExternalSymbol( FunctionName, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - LowerCallTo(I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall()); } /// Given a @llvm.call.preallocated.setup, return the corresponding @@ -5718,10 +5866,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MC); return; } @@ -5739,10 +5889,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MC); return; } @@ -5756,8 +5908,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, - MachinePointerInfo(I.getArgOperand(0))); + MachinePointerInfo(I.getArgOperand(0)), AAInfo); updateDAGForMaybeTailCall(MS); return; } @@ -5775,9 +5929,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MM); return; } @@ -5859,7 +6015,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { + // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e. + // they are non-variadic. const auto &DI = cast<DbgVariableIntrinsic>(I); + assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); @@ -5867,7 +6026,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI << "\n"); // Check if address has undef value. - const Value *Address = DI.getVariableLocation(); + const Value *Address = DI.getVariableLocationOp(0); if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI @@ -5898,8 +6057,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (FI != std::numeric_limits<int>::max()) { if (Intrinsic == Intrinsic::dbg_addr) { SDDbgValue *SDV = DAG.getFrameIndexDbgValue( - Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true, + dl, SDNodeOrder); + DAG.AddDbgValue(SDV, isParameter); } else { LLVM_DEBUG(dbgs() << "Skipping " << DI << " (variable info stashed in MF side table)\n"); @@ -5931,7 +6091,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, dl, SDNodeOrder); } - DAG.AddDbgValue(SDV, N.getNode(), isParameter); + DAG.AddDbgValue(SDV, isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. @@ -5960,20 +6120,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); - const Value *V = DI.getValue(); - if (!V) + SmallVector<Value *, 4> Values(DI.getValues()); + if (Values.empty()) return; - if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(), - SDNodeOrder)) + if (std::count(Values.begin(), Values.end(), nullptr)) return; - // TODO: Dangling debug info will eventually either be resolved or produce - // an Undef DBG_VALUE. However in the resolution case, a gap may appear - // between the original dbg.value location and its resolved DBG_VALUE, which - // we should ideally fill with an extra Undef DBG_VALUE. - - DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); + bool IsVariadic = DI.hasArgList(); + if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(), + SDNodeOrder, IsVariadic)) + addDanglingDebugInfo(&DI, dl, SDNodeOrder); return; } @@ -6165,6 +6322,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::arithmetic_fence: { + setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), Flags)); + return; + } case Intrinsic::fma: setValue(&I, DAG.getNode( ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -6215,19 +6378,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0))))); return; case Intrinsic::fptosi_sat: { - EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); - SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); - setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type, - getValue(I.getArgOperand(0)), SatW)); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT, + getValue(I.getArgOperand(0)), + DAG.getValueType(VT.getScalarType()))); return; } case Intrinsic::fptoui_sat: { - EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); - SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); - setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type, - getValue(I.getArgOperand(0)), SatW)); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT, + getValue(I.getArgOperand(0)), + DAG.getValueType(VT.getScalarType()))); return; } + case Intrinsic::set_rounding: + Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other, + {getRoot(), getValue(I.getArgOperand(0))}); + setValue(&I, Res); + DAG.setRoot(Res.getValue(0)); + return; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -6642,9 +6811,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SmallVector<const Value *, 4> Allocas; getUnderlyingObjects(ObjectPtr, Allocas); - for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), - E = Allocas.end(); Object != E; ++Object) { - const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + for (const Value *Alloca : Allocas) { + const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca); // Could not find an Alloca. if (!LifetimeObject) @@ -6688,6 +6856,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, lowerCallToExternalSymbol(I, FunctionName); return; case Intrinsic::donothing: + case Intrinsic::seh_try_begin: + case Intrinsic::seh_scope_begin: + case Intrinsic::seh_try_end: + case Intrinsic::seh_scope_end: // ignore return; case Intrinsic::experimental_stackmap: @@ -6849,7 +7021,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return; - + case Intrinsic::experimental_stepvector: + visitStepVector(I); + return; case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: case Intrinsic::vector_reduce_add: @@ -6986,6 +7160,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); + + // The intrinsic's index type is i64, but the SDNode requires an index type + // suitable for the target. Convert the index as required. + MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (Index.getValueType() != VectorIdxTy) + Index = DAG.getVectorIdxConstant( + cast<ConstantSDNode>(Index)->getZExtValue(), DL); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, Index)); @@ -6998,9 +7180,22 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // The intrinsic's index type is i64, but the SDNode requires an index type + // suitable for the target. Convert the index as required. + MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (Index.getValueType() != VectorIdxTy) + Index = DAG.getVectorIdxConstant( + cast<ConstantSDNode>(Index)->getZExtValue(), DL); + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); return; } + case Intrinsic::experimental_vector_reverse: + visitVectorReverse(I); + return; + case Intrinsic::experimental_vector_splice: + visitVectorSplice(I); + return; } } @@ -7104,7 +7299,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI); - Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate()))); + ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + Opers.push_back(DAG.getCondCode(Condition)); break; } } @@ -7134,6 +7332,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { + SDLoc DL = getCurSDLoc(); unsigned Opcode = getISDForVPIntrinsic(VPIntrin); SmallVector<EVT, 4> ValueVTs; @@ -7141,46 +7340,92 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); + auto EVLParamPos = + VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID()); + + MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); + assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && + "Unexpected target EVL type"); + // Request operands. SmallVector<SDValue, 7> OpValues; - for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i) - OpValues.push_back(getValue(VPIntrin.getArgOperand(i))); + for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) { + auto Op = getValue(VPIntrin.getArgOperand(I)); + if (I == EVLParamPos) + Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op); + OpValues.push_back(Op); + } - SDLoc DL = getCurSDLoc(); SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); setValue(&VPIntrin, Result); } -std::pair<SDValue, SDValue> -SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - const BasicBlock *EHPadBB) { +SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, + const BasicBlock *EHPadBB, + MCSymbol *&BeginLabel) { MachineFunction &MF = DAG.getMachineFunction(); MachineModuleInfo &MMI = MF.getMMI(); - MCSymbol *BeginLabel = nullptr; - if (EHPadBB) { - // Insert a label before the invoke call to mark the try range. This can be - // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().createTempSymbol(); + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().createTempSymbol(); - // For SjLj, keep track of which landing pads go with which invokes - // so as to maintain the ordering of pads in the LSDA. - unsigned CallSiteIndex = MMI.getCurrentCallSite(); - if (CallSiteIndex) { - MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); - // Now that the call site is handled, stop tracking it. - MMI.setCurrentCallSite(0); - } + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + + return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel); +} + +SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II, + const BasicBlock *EHPadBB, + MCSymbol *BeginLabel) { + assert(BeginLabel && "BeginLabel should've been set"); + MachineFunction &MF = DAG.getMachineFunction(); + MachineModuleInfo &MMI = MF.getMMI(); + + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); + Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel); + + // Inform MachineModuleInfo of range. + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + // There is a platform (e.g. wasm) that uses funclet style IR but does not + // actually use outlined funclets and their LSDA info style. + if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { + assert(II && "II should've been set"); + WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); + EHInfo->addIPToStateRange(II, BeginLabel, EndLabel); + } else if (!isScopedEHPersonality(Pers)) { + assert(EHPadBB); + MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } + + return Chain; +} + +std::pair<SDValue, SDValue> +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB) { + MCSymbol *BeginLabel = nullptr; + + if (EHPadBB) { // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); - DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); - + DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel)); CLI.setChain(getRoot()); } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); @@ -7202,22 +7447,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, } if (EHPadBB) { - // Insert a label at the end of the invoke call to mark the try range. This - // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); - DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); - - // Inform MachineModuleInfo of range. - auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); - // There is a platform (e.g. wasm) that uses funclet style IR but does not - // actually use outlined funclets and their LSDA info style. - if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { - assert(CLI.CB); - WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel); - } else if (!isScopedEHPersonality(Pers)) { - MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); - } + DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB, + BeginLabel)); } return Result; @@ -7225,6 +7456,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, + bool isMustTailCall, const BasicBlock *EHPadBB) { auto &DL = DAG.getDataLayout(); FunctionType *FTy = CB.getFunctionType(); @@ -7241,7 +7473,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, // attribute. auto *Caller = CB.getParent()->getParent(); if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == - "true") + "true" && !isMustTailCall) isTailCall = false; // We can't tail call inside a function with a swifterror argument. Lowering @@ -7528,10 +7760,12 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); @@ -7851,7 +8085,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // CFGuardTarget bundles are lowered in LowerCallTo. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) && + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, + LLVMContext::OB_clang_arc_attachedcall}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); @@ -7862,7 +8097,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. - LowerCallTo(I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall()); } namespace { @@ -8055,7 +8290,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // remember that AX is actually i16 to get the right extension. const MVT RegVT = *TRI.legalclasstypes_begin(*RC); - if (OpInfo.ConstraintVT != MVT::Other) { + if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. @@ -8102,7 +8337,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) - NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT); // If this is a constraint for a specific physical register, like {r17}, // assign it now. @@ -8186,7 +8421,8 @@ public: } // end anonymous namespace /// visitInlineAsm - Handle a call to an InlineAsm object. -void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { +void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, + const BasicBlock *EHPadBB) { const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. @@ -8274,19 +8510,28 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { ExtraInfo.update(T); } - // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); + bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow(); + if (EmitEHLabels) { + assert(EHPadBB && "InvokeInst must have an EHPadBB"); + } bool IsCallBr = isa<CallBrInst>(Call); - if (IsCallBr) { - // If this is a callbr we need to flush pending exports since inlineasm_br - // is a terminator. We need to do this before nodes are glued to - // the inlineasm_br node. + + if (IsCallBr || EmitEHLabels) { + // If this is a callbr or invoke we need to flush pending exports since + // inlineasm_br and invoke are terminators. + // We need to do this before nodes are glued to the inlineasm_br node. Chain = getControlRoot(); } + MCSymbol *BeginLabel = nullptr; + if (EmitEHLabels) { + Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); + } + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { // If this is an output operand with a matching input operand, look up the @@ -8425,21 +8670,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { return; } - MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); SmallVector<unsigned, 4> Regs; - - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { - unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); - MachineRegisterInfo &RegInfo = - DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(RegInfo.createVirtualRegister(RC)); - } else { - emitInlineAsmError(Call, - "inline asm error: This value type register " - "class is not natively supported!"); - return; - } + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); + Register TiedReg = R->getReg(); + MVT RegVT = R->getSimpleValueType(0); + const TargetRegisterClass *RC = TiedReg.isVirtual() ? + MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg); + unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(MRI.createVirtualRegister(RC)); RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); @@ -8677,8 +8919,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); + if (EmitEHLabels) { + Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel); + } + // Only Update Root if inline assembly has a memory effect. - if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr) + if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr || + EmitEHLabels) DAG.setRoot(Chain); } @@ -9217,6 +9464,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsByRef = false; Entry.IsReturned = false; Entry.IsSwiftSelf = false; + Entry.IsSwiftAsync = false; Entry.IsSwiftError = false; Entry.IsCFGuardTarget = false; Entry.Alignment = Alignment; @@ -9229,7 +9477,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.IsTailCall = false; } else { bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( - CLI.RetTy, CLI.CallConv, CLI.IsVarArg); + CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { ISD::ArgFlagsTy Flags; if (NeedsRegBlock) { @@ -9287,9 +9535,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) - FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); + FinalType = Args[i].IndirectType; bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( - FinalType, CLI.CallConv, CLI.IsVarArg); + FinalType, CLI.CallConv, CLI.IsVarArg, DL); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -9302,6 +9550,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // for a type depending on the context. Give the target a chance to // specify the alignment it wants. const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); + Flags.setOrigAlign(OriginalAlignment); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9329,6 +9578,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setSRet(); if (Args[i].IsSwiftSelf) Flags.setSwiftSelf(); + if (Args[i].IsSwiftAsync) + Flags.setSwiftAsync(); if (Args[i].IsSwiftError) Flags.setSwiftError(); if (Args[i].IsCFGuardTarget) @@ -9355,27 +9606,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // in the various CC lowering callbacks. Flags.setByVal(); } + Align MemAlign; if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { - PointerType *Ty = cast<PointerType>(Args[i].Ty); - Type *ElementTy = Ty->getElementType(); - - unsigned FrameSize = DL.getTypeAllocSize( - Args[i].ByValType ? Args[i].ByValType : ElementTy); + unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType); Flags.setByValSize(FrameSize); // info is not there but there are cases it cannot get right. - Align FrameAlign; if (auto MA = Args[i].Alignment) - FrameAlign = *MA; + MemAlign = *MA; else - FrameAlign = Align(getByValTypeAlignment(ElementTy, DL)); - Flags.setByValAlign(FrameAlign); + MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL)); + } else if (auto MA = Args[i].Alignment) { + MemAlign = *MA; + } else { + MemAlign = OriginalAlignment; } + Flags.setMemAlign(MemAlign); if (Args[i].IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -9660,8 +9910,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // We will look through cast uses, so ignore them completely. if (I.isCast()) continue; - // Ignore debug info intrinsics, they don't escape or store to allocas. - if (isa<DbgInfoIntrinsic>(I)) + // Ignore debug info and pseudo op intrinsics, they don't escape or store + // to allocas. + if (I.isDebugOrPseudoInst()) continue; // This is an unknown instruction. Assume it escapes or writes to all // static alloca operands. @@ -9688,13 +9939,17 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, continue; // Check if the stored value is an argument, and that this store fully - // initializes the alloca. Don't elide copies from the same argument twice. + // initializes the alloca. + // If the argument type has padding bits we can't directly forward a pointer + // as the upper bits may contain garbage. + // Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast<Argument>(Val); if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || + !DL.typeSizeEqualsStoreSize(Arg->getType()) || ArgCopyElisionCandidates.count(Arg)) { *Info = StaticAllocaInfo::Clobbered; continue; @@ -9829,18 +10084,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Arg.hasAttribute(Attribute::ByVal)) FinalType = Arg.getParamByValType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( - FinalType, F.getCallingConv(), F.isVarArg()); + FinalType, F.getCallingConv(), F.isVarArg(), DL); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - // Certain targets (such as MIPS), may have a different ABI alignment - // for a type depending on the context. Give the target a chance to - // specify the alignment it wants. - const Align OriginalAlignment( - TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9868,6 +10118,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setSRet(); if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); + if (Arg.hasAttribute(Attribute::SwiftAsync)) + Flags.setSwiftAsync(); if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); if (Arg.hasAttribute(Attribute::ByVal)) @@ -9893,6 +10145,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setByVal(); } + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); + Flags.setOrigAlign(OriginalAlignment); + + Align MemAlign; Type *ArgMemTy = nullptr; if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || Flags.isByRef()) { @@ -9904,24 +10164,27 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For in-memory arguments, size and alignment should be passed from FE. // BE will guess if this info is not there but there are cases it cannot // get right. - MaybeAlign MemAlign = Arg.getParamAlign(); - if (!MemAlign) + if (auto ParamAlign = Arg.getParamStackAlign()) + MemAlign = *ParamAlign; + else if ((ParamAlign = Arg.getParamAlign())) + MemAlign = *ParamAlign; + else MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL)); - - if (Flags.isByRef()) { + if (Flags.isByRef()) Flags.setByRefSize(MemSize); - Flags.setByRefAlign(*MemAlign); - } else { + else Flags.setByValSize(MemSize); - Flags.setByValAlign(*MemAlign); - } + } else if (auto ParamAlign = Arg.getParamStackAlign()) { + MemAlign = *ParamAlign; + } else { + MemAlign = OriginalAlignment; } + Flags.setMemAlign(MemAlign); if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - Flags.setOrigAlign(OriginalAlignment); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); if (Arg.hasAttribute(Attribute::Returned)) @@ -10807,6 +11070,36 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } } +void SelectionDAGBuilder::visitStepVector(const CallInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto DL = getCurSDLoc(); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getStepVector(DL, ResultVT)); +} + +void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + SDLoc DL = getCurSDLoc(); + SDValue V = getValue(I.getOperand(0)); + assert(VT == V.getValueType() && "Malformed vector.reverse!"); + + if (VT.isScalableVector()) { + setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V)); + return; + } + + // Use VECTOR_SHUFFLE for the fixed-length vector + // to maintain existing behavior. + SmallVector<int, 8> Mask; + unsigned NumElts = VT.getVectorMinNumElements(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(NumElts - 1 - i); + + setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask)); +} + void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), @@ -10824,3 +11117,37 @@ void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); } + +void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + SDLoc DL = getCurSDLoc(); + SDValue V1 = getValue(I.getOperand(0)); + SDValue V2 = getValue(I.getOperand(1)); + int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue(); + + // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node. + if (VT.isScalableVector()) { + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2, + DAG.getConstant(Imm, DL, IdxVT))); + return; + } + + unsigned NumElts = VT.getVectorNumElements(); + + if ((-Imm > NumElts) || (Imm >= NumElts)) { + // Result is undefined if immediate is out-of-bounds. + setValue(&I, DAG.getUNDEF(VT)); + return; + } + + uint64_t Idx = (NumElts + Imm) % NumElts; + + // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors. + SmallVector<int, 8> Mask; + for (unsigned i = 0; i < NumElts; ++i) + Mask.push_back(Idx + i); + setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask)); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8f6e98c40161..df5be156821f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -492,6 +492,10 @@ public: /// of the specified type Ty. Return empty SDValue() otherwise. SDValue getCopyFromRegs(const Value *V, Type *Ty); + /// Register a dbg_value which relies on a Value which we have not yet seen. + void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL, + unsigned Order); + /// If we have dangling debug info that describes \p Variable, or an /// overlapping part of variable considering the \p Expr, then this method /// will drop that debug info as it isn't valid any longer. @@ -507,11 +511,11 @@ public: /// this cannot be done, produce an Undef debug value record. void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI); - /// For a given Value, attempt to create and record a SDDbgValue in the - /// SelectionDAG. - bool handleDebugValue(const Value *V, DILocalVariable *Var, - DIExpression *Expr, DebugLoc CurDL, - DebugLoc InstDL, unsigned Order); + /// For a given list of Values, attempt to create and record a SDDbgValue in + /// the SelectionDAG. + bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var, + DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL, + unsigned Order, bool IsVariadic); /// Evict any dangling debug information, attempting to salvage it first. void resolveOrClearDbgInfo(); @@ -549,7 +553,7 @@ public: void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall, - const BasicBlock *EHPadBB = nullptr); + bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr); // Lower range metadata from 0 to N to assert zext to an integer of nearest // floor power of two. @@ -755,7 +759,8 @@ private: void visitStoreToSwiftError(const StoreInst &I); void visitFreeze(const FreezeInst &I); - void visitInlineAsm(const CallBase &Call); + void visitInlineAsm(const CallBase &Call, + const BasicBlock *EHPadBB = nullptr); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); @@ -773,6 +778,9 @@ private: void visitGCResult(const GCResultInst &I); void visitVectorReduce(const CallInst &I, unsigned Intrinsic); + void visitVectorReverse(const CallInst &I); + void visitVectorSplice(const CallInst &I); + void visitStepVector(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -809,6 +817,11 @@ private: /// Lowers CallInst to an external symbol. void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName); + + SDValue lowerStartEH(SDValue Chain, const BasicBlock *EHPadBB, + MCSymbol *&BeginLabel); + SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, + const BasicBlock *EHPadBB, MCSymbol *BeginLabel); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d867f3e09e9c..40083c614a6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -145,7 +145,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID, None); + return Intrinsic::getBaseName((Intrinsic::ID)IID).str(); else if (!G) return "Unknown intrinsic"; else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) @@ -231,6 +231,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MUL: return "mul"; case ISD::MULHU: return "mulhu"; case ISD::MULHS: return "mulhs"; + case ISD::ABDS: return "abds"; + case ISD::ABDU: return "abdu"; case ISD::SDIV: return "sdiv"; case ISD::UDIV: return "udiv"; case ISD::SREM: return "srem"; @@ -288,7 +290,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::VECTOR_SPLICE: return "vector_splice"; case ISD::SPLAT_VECTOR: return "splat_vector"; + case ISD::SPLAT_VECTOR_PARTS: return "splat_vector_parts"; + case ISD::VECTOR_REVERSE: return "vector_reverse"; + case ISD::STEP_VECTOR: return "step_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; @@ -336,7 +342,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::STRICT_FP_ROUND: return "strict_fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; @@ -410,6 +415,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PREALLOCATED_ARG: return "call_alloc"; + // Floating point environment manipulation + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::SET_ROUNDING: return "set_rounding"; + // Bit manipulation case ISD::ABS: return "abs"; case ISD::BITREVERSE: return "bitreverse"; @@ -828,26 +837,38 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const { OS << " DbgVal(Order=" << getOrder() << ')'; - if (isInvalidated()) OS << "(Invalidated)"; - if (isEmitted()) OS << "(Emitted)"; - switch (getKind()) { - case SDNODE: - if (getSDNode()) - OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')'; - else - OS << "(SDNODE)"; - break; - case CONST: - OS << "(CONST)"; - break; - case FRAMEIX: - OS << "(FRAMEIX=" << getFrameIx() << ')'; - break; - case VREG: - OS << "(VREG=" << getVReg() << ')'; - break; + if (isInvalidated()) + OS << "(Invalidated)"; + if (isEmitted()) + OS << "(Emitted)"; + OS << "("; + bool Comma = false; + for (const SDDbgOperand &Op : getLocationOps()) { + if (Comma) + OS << ", "; + switch (Op.getKind()) { + case SDDbgOperand::SDNODE: + if (Op.getSDNode()) + OS << "SDNODE=" << PrintNodeId(*Op.getSDNode()) << ':' << Op.getResNo(); + else + OS << "SDNODE"; + break; + case SDDbgOperand::CONST: + OS << "CONST"; + break; + case SDDbgOperand::FRAMEIX: + OS << "FRAMEIX=" << Op.getFrameIx(); + break; + case SDDbgOperand::VREG: + OS << "VREG=" << Op.getVReg(); + break; + } + Comma = true; } + OS << ")"; if (isIndirect()) OS << "(Indirect)"; + if (isVariadic()) + OS << "(Variadic)"; OS << ":\"" << Var->getName() << '"'; #ifndef NDEBUG if (Expr->getNumElements()) @@ -892,12 +913,10 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { LLVM_DUMP_METHOD void SelectionDAG::dump() const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = &*I; - if (!N->hasOneUse() && N != getRoot().getNode() && - (!shouldPrintInline(*N, this) || N->use_empty())) - DumpNodes(N, 2, this); + for (const SDNode &N : allnodes()) { + if (!N.hasOneUse() && &N != getRoot().getNode() && + (!shouldPrintInline(N, this) || N.use_empty())) + DumpNodes(&N, 2, this); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7bae5048fc0e..1415cce3b1df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -270,6 +270,10 @@ namespace llvm { return createHybridListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::VLIW) return createVLIWDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::Fast) + return createFastDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::Linearize) + return createDAGLinearizer(IS, OptLevel); assert(TLI->getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); @@ -571,8 +575,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. + bool InstrRef = TM.Options.ValueTrackingVariableLocations; for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { - MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1]; + assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && + "Function parameters should not be described by DBG_VALUE_LIST."); bool hasFI = MI->getOperand(0).isFI(); Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); @@ -589,6 +596,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { << Register::virtReg2Index(Reg) << "\n"); } + // Don't try and extend through copies in instruction referencing mode. + if (InstrRef) + continue; + // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { @@ -605,6 +616,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "DBG_VALUE with nonzero offset"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); + assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && + "Didn't expect to see a DBG_VALUE_LIST here"); // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Variable, Expr); @@ -638,6 +651,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } } + // For debug-info, in instruction referencing mode, we need to perform some + // post-isel maintenence. + MF->finalizeDebugInstrRefs(); + // Determine if there are any calls in this machine function. MachineFrameInfo &MFI = MF->getFrameInfo(); for (const auto &MBB : *MF) { @@ -1419,9 +1436,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; - for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); - PI != PE; ++PI) { - if (!FuncInfo->VisitedBBs.count(*PI)) { + for (const BasicBlock *Pred : predecessors(LLVMBB)) { + if (!FuncInfo->VisitedBBs.count(Pred)) { AllPredsVisited = false; break; } @@ -1691,9 +1707,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB) { +FindSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - // if (SplitPoint == BB->begin()) return SplitPoint; @@ -1701,6 +1717,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator Previous = SplitPoint; --Previous; + if (TII.isTailCall(*SplitPoint) && + Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // call itself, then we must insert before the sequence even starts. For + // example: + // <split point> + // ADJCALLSTACKDOWN ... + // <Moves> + // ADJCALLSTACKUP ... + // TAILJMP somewhere + // On the other hand, it could be an unrelated call in which case this tail call + // has to register moves of its own and should be the split point. For example: + // ADJCALLSTACKDOWN + // CALL something_else + // ADJCALLSTACKUP + // <split point> + // TAILJMP somewhere + do { + --Previous; + if (Previous->isCall()) + return SplitPoint; + } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + + return Previous; + } + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) @@ -1740,7 +1781,7 @@ SelectionDAGISel::FinishBasicBlock() { // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1759,7 +1800,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -2293,6 +2334,11 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0), + N->getOperand(0)); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2579,6 +2625,17 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); } +// Bit 0 stores the sign of the immediate. The upper bits contain the magnitude +// shifted left by 1. +static uint64_t decodeSignRotatedValue(uint64_t V) { + if ((V & 1) == 0) + return V >> 1; + if (V != 1) + return -(V >> 1); + // There is no such thing as -0 with integers. "-0" really means MININT. + return 1ULL << 63; +} + LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { @@ -2586,6 +2643,8 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); + Val = decodeSignRotatedValue(Val); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); return C && C->getSExtValue() == Val; } @@ -2831,6 +2890,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; + case ISD::ARITH_FENCE: + Select_ARITH_FENCE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -3239,12 +3301,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } - case OPC_EmitInteger: { + case OPC_EmitInteger: + case OPC_EmitStringInteger: { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); + if (Opcode == OPC_EmitInteger) + Val = decodeSignRotatedValue(Val); RecordedNodes.push_back(std::pair<SDValue, SDNode*>( CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr)); @@ -3729,7 +3794,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { unsigned iid = cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); if (iid < Intrinsic::num_intrinsics) - Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None); + Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid); else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) Msg << "target intrinsic %" << TII->getName(iid); else diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 059a6baf967a..d022e2a23ea0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -204,7 +204,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { /// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) /// Used from getNodeAttributes. -const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { #ifndef NDEBUG std::map<const SDNode *, std::string>::const_iterator I = NodeGraphAttrs.find(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 0172646c22ec..a903c2401264 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -21,7 +21,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -33,6 +32,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GCStrategy.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" @@ -72,10 +72,6 @@ cl::opt<unsigned> MaxRegistersForGCPointers( "max-registers-for-gc-values", cl::Hidden, cl::init(0), cl::desc("Max number of VRegs allowed to pass GC pointer meta args in")); -cl::opt<bool> AlwaysSpillBase("statepoint-always-spill-base", cl::Hidden, - cl::init(true), - cl::desc("Force spilling of base GC pointers")); - typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType; static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, @@ -113,7 +109,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); unsigned SpillSize = ValueType.getStoreSize(); - assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); + assert((SpillSize * 8) == + (-8u & (7 + ValueType.getSizeInBits())) && // Round up modulo 8. + "Size not in bytes?"); // First look for a previously created stack slot which is not in // use (accounting for the fact arbitrary slots may already be @@ -386,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // (i.e. change the '==' in the assert below to a '>='). MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); assert((MFI.getObjectSize(Index) * 8) == - (int64_t)Incoming.getValueSizeInBits() && + (-8 & (7 + // Round up modulo 8. + (int64_t)Incoming.getValueSizeInBits())) && "Bad spill: stack slot does not match!"); // Note: Using the alignment of the spill slot (rather than the abi or @@ -489,6 +488,18 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, } +/// Return true if value V represents the GC value. The behavior is conservative +/// in case it is not sure that value is not GC the function returns true. +static bool isGCValue(const Value *V, SelectionDAGBuilder &Builder) { + auto *Ty = V->getType(); + if (!Ty->isPtrOrPtrVectorTy()) + return false; + if (auto *GFI = Builder.GFI) + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + return *IsManaged; + return true; // conservative +} + /// Lower deopt state and gc pointer arguments of the statepoint. The actual /// lowering is described in lowerIncomingStatepointValue. This function is /// responsible for lowering everything in the right position and playing some @@ -607,18 +618,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n"); - auto isGCValue = [&](const Value *V) { - auto *Ty = V->getType(); - if (!Ty->isPtrOrPtrVectorTy()) - return false; - if (auto *GFI = Builder.GFI) - if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) - return *IsManaged; - return true; // conservative - }; - auto requireSpillSlot = [&](const Value *V) { - if (isGCValue(V)) + if (!Builder.DAG.getTargetLoweringInfo().isTypeLegal( + Builder.getValue(V).getValueType())) + return true; + if (isGCValue(V, Builder)) return !LowerAsVReg.count(Builder.getValue(V)); return !(LiveInDeopt || UseRegistersForDeoptValues); }; @@ -727,8 +731,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); - assert(SI.Bases.size() == SI.Ptrs.size() && - SI.Ptrs.size() <= SI.GCRelocates.size()); + assert(SI.Bases.size() == SI.Ptrs.size()); LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG @@ -1042,6 +1045,21 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, } } + // If we find a deopt value which isn't explicitly added, we need to + // ensure it gets lowered such that gc cycles occurring before the + // deoptimization event during the lifetime of the call don't invalidate + // the pointer we're deopting with. Note that we assume that all + // pointers passed to deopt are base pointers; relaxing that assumption + // would require relatively large changes to how we represent relocations. + for (Value *V : I.deopt_operands()) { + if (!isGCValue(V, *this)) + continue; + if (Seen.insert(getValue(V)).second) { + SI.Bases.push_back(V); + SI.Ptrs.push_back(V); + } + } + SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end()); SI.StatepointInstr = &I; SI.ID = I.getID(); @@ -1057,23 +1075,25 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const GCResultInst *GCResult = I.getGCResult(); + const std::pair<bool, bool> GCResultLocality = I.getGCResultLocality(); Type *RetTy = I.getActualReturnType(); - if (RetTy->isVoidTy() || !GCResult) { + if (RetTy->isVoidTy() || + (!GCResultLocality.first && !GCResultLocality.second)) { // The return value is not needed, just generate a poison value. setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc())); return; } - if (GCResult->getParent() == I.getParent()) { + if (GCResultLocality.first) { // Result value will be used in a same basic block. Don't export it or // perform any explicit register copies. The gc_result will simply grab // this value. setValue(&I, ReturnValue); - return; } + if (!GCResultLocality.second) + return; // Result value will be used in a different basic block so we need to export // it now. Default exporting mechanism will not work here because statepoint // call has a different type than the actual call. It means that by default @@ -1191,7 +1211,40 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { setValue(&Relocate, Relocation); return; } - + + if (Record.type == RecordType::Spill) { + unsigned Index = Record.payload.FI; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); + + // All the reloads are independent and are reading memory only modified by + // statepoints (i.e. no other aliasing stores); informing SelectionDAG of + // this this let's CSE kick in for free and allows reordering of + // instructions if possible. The lowering for statepoint sets the root, + // so this is ordering all reloads with the either + // a) the statepoint node itself, or + // b) the entry of the current block for an invoke statepoint. + const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() + + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlign(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = + DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO); + PendingLoads.push_back(SpillLoad.getValue(1)); + + assert(SpillLoad.getNode()); + setValue(&Relocate, SpillLoad); + return; + } + + assert(Record.type == RecordType::NoRelocate); SDValue SD = getValue(DerivedPtr); if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { @@ -1201,43 +1254,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. - if (Record.type == RecordType::NoRelocate) { - setValue(&Relocate, SD); - return; - } - - assert(Record.type == RecordType::Spill); - - unsigned Index = Record.payload.FI;; - SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - - // All the reloads are independent and are reading memory only modified by - // statepoints (i.e. no other aliasing stores); informing SelectionDAG of - // this this let's CSE kick in for free and allows reordering of instructions - // if possible. The lowering for statepoint sets the root, so this is - // ordering all reloads with the either a) the statepoint node itself, or b) - // the entry of the current block for an invoke statepoint. - const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() - - auto &MF = DAG.getMachineFunction(); - auto &MFI = MF.getFrameInfo(); - auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MFI.getObjectSize(Index), - MFI.getObjectAlign(Index)); - - auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()); - - SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, - SpillSlot, LoadMMO); - PendingLoads.push_back(SpillLoad.getValue(1)); - - assert(SpillLoad.getNode()); - setValue(&Relocate, SpillLoad); + setValue(&Relocate, SD); } void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) { diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 634ef87f3840..addc0a7eef3a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -18,11 +18,11 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/IntrinsicInst.h" #include <cassert> namespace llvm { -class CallInst; class SelectionDAGBuilder; /// This class tracks both per-statepoint and per-selectiondag information. @@ -63,7 +63,7 @@ public: /// Record the fact that we expect to encounter a given gc_relocate /// before the next statepoint. If we don't see it, we'll report /// an assertion. - void scheduleRelocCall(const CallInst &RelocCall) { + void scheduleRelocCall(const GCRelocateInst &RelocCall) { // We are not interested in lowering dead instructions. if (!RelocCall.use_empty()) PendingGCRelocateCalls.push_back(&RelocCall); @@ -72,7 +72,7 @@ public: /// Remove this gc_relocate from the list we're expecting to see /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. - void relocCallVisited(const CallInst &RelocCall) { + void relocCallVisited(const GCRelocateInst &RelocCall) { // We are not interested in lowering dead instructions. if (RelocCall.use_empty()) return; @@ -118,7 +118,7 @@ private: unsigned NextSlotToAllocate = 0; /// Keep track of pending gcrelocate calls for consistency check - SmallVector<const CallInst *, 10> PendingGCRelocateCalls; + SmallVector<const GCRelocateInst *, 10> PendingGCRelocateCalls; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5760132e44a0..1c1dae8f953f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -53,22 +53,24 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, const Function &F = DAG.getMachineFunction().getFunction(); // First, check if tail calls have been disabled in this function. - if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + if (F.getFnAttribute("disable-tail-calls").getValueAsBool()) return false; // Conservatively require the attributes of the call to match those of - // the return. Ignore NoAlias and NonNull because they don't affect the + // the return. Ignore following attributes because they don't affect the // call sequence. - AttributeList CallerAttrs = F.getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) - .removeAttribute(Attribute::NoAlias) - .removeAttribute(Attribute::NonNull) - .hasAttributes()) + AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex); + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull}) + CallerAttrs.removeAttribute(Attr); + + if (CallerAttrs.hasAttributes()) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + if (CallerAttrs.contains(Attribute::ZExt) || + CallerAttrs.contains(Attribute::SExt)) return false; // Check if the only use is a function return node. @@ -114,14 +116,21 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); + IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = Call->getParamAlign(ArgIdx); - ByValType = nullptr; - if (IsByVal) - ByValType = Call->getParamByValType(ArgIdx); - PreallocatedType = nullptr; + Alignment = Call->getParamStackAlign(ArgIdx); + IndirectType = nullptr; + assert(IsByVal + IsPreallocated + IsInAlloca <= 1 && + "multiple ABI attributes?"); + if (IsByVal) { + IndirectType = Call->getParamByValType(ArgIdx); + if (!Alignment) + Alignment = Call->getParamAlign(ArgIdx); + } if (IsPreallocated) - PreallocatedType = Call->getParamPreallocatedType(ArgIdx); + IndirectType = Call->getParamPreallocatedType(ArgIdx); + if (IsInAlloca) + IndirectType = Call->getParamInAllocaType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -194,9 +203,8 @@ bool TargetLowering::findOptimalMemOpLowering( // equal to DstAlign (or zero). VT = MVT::i64; if (Op.isFixedDstAlign()) - while ( - Op.getDstAlign() < (VT.getSizeInBits() / 8) && - !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value())) + while (Op.getDstAlign() < (VT.getSizeInBits() / 8) && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign())) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); @@ -250,7 +258,7 @@ bool TargetLowering::findOptimalMemOpLowering( bool Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( - VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1, + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; @@ -502,7 +510,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, case ISD::AND: case ISD::OR: { auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - if (!Op1C) + if (!Op1C || Op1C->isOpaque()) return false; // If this is a 'not' op, don't touch it because that's a canonical form. @@ -971,11 +979,12 @@ bool TargetLowering::SimplifyDemandedBits( Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; // Don't fall through, will infinitely loop. case ISD::LOAD: { - LoadSDNode *LD = cast<LoadSDNode>(Op); + auto *LD = cast<LoadSDNode>(Op); if (getTargetConstantFromLoad(LD)) { Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; // Don't fall through, will infinitely loop. - } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + } + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { // If this is a ZEXTLoad and we are looking at the loaded value. EVT MemVT = LD->getMemoryVT(); unsigned MemBits = MemVT.getScalarSizeInBits(); @@ -2012,7 +2021,7 @@ bool TargetLowering::SimplifyDemandedBits( const APInt *ShAmtC = TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); - if (!ShAmtC) + if (!ShAmtC || ShAmtC->uge(BitWidth)) break; uint64_t ShVal = ShAmtC->getZExtValue(); @@ -2267,10 +2276,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); - for (SDNodeIterator I = SDNodeIterator::begin(N), - E = SDNodeIterator::end(N); - I != E; ++I) { - SDNode *Op = *I; + for (SDNode *Op : + llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) if (C->isOpaque()) return false; @@ -2417,6 +2424,27 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.setAllBits(); return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); } + SDValue ScalarSrc = Op.getOperand(0); + if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue Src = ScalarSrc.getOperand(0); + SDValue Idx = ScalarSrc.getOperand(1); + EVT SrcVT = Src.getValueType(); + + ElementCount SrcEltCnt = SrcVT.getVectorElementCount(); + + if (SrcEltCnt.isScalable()) + return false; + + unsigned NumSrcElts = SrcEltCnt.getFixedValue(); + if (isNullConstant(Idx)) { + APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0); + APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts); + APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts); + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + } + } KnownUndef.setHighBits(NumElts - 1); break; } @@ -3028,6 +3056,19 @@ const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { return nullptr; } +bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op" + " is a target node!"); + return false; +} + bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN, @@ -4502,40 +4543,39 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, GA->getValueType(0), Offset + GA->getOffset())); return; - } else if ((C = dyn_cast<ConstantSDNode>(Op)) && - ConstraintLetter != 's') { + } + if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') { // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. bool IsBool = C->getConstantIntValue()->getBitWidth() == 1; BooleanContent BCont = getBooleanContents(MVT::i64); - ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) - : ISD::SIGN_EXTEND; - int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() - : C->getSExtValue(); - Ops.push_back(DAG.getTargetConstant(Offset + ExtVal, - SDLoc(C), MVT::i64)); + ISD::NodeType ExtOpc = + IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND; + int64_t ExtVal = + ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue(); + Ops.push_back( + DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64)); return; - } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && - ConstraintLetter != 'n') { + } + if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') { Ops.push_back(DAG.getTargetBlockAddress( BA->getBlockAddress(), BA->getValueType(0), Offset + BA->getOffset(), BA->getTargetFlags())); return; - } else { - const unsigned OpCode = Op.getOpcode(); - if (OpCode == ISD::ADD || OpCode == ISD::SUB) { - if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0)))) - Op = Op.getOperand(1); - // Subtraction is not commutative. - else if (OpCode == ISD::ADD && - (C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))) - Op = Op.getOperand(0); - else - return; - Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue(); - continue; - } + } + const unsigned OpCode = Op.getOpcode(); + if (OpCode == ISD::ADD || OpCode == ISD::SUB) { + if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0)))) + Op = Op.getOperand(1); + // Subtraction is not commutative. + else if (OpCode == ISD::ADD && + (C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))) + Op = Op.getOperand(0); + else + return; + Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue(); + continue; } return; } @@ -4565,11 +4605,10 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, if (!isLegalRC(*RI, *RC)) continue; - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - if (RegName.equals_lower(RI->getRegAsmName(*I))) { + for (const MCPhysReg &PR : *RC) { + if (RegName.equals_insensitive(RI->getRegAsmName(PR))) { std::pair<unsigned, const TargetRegisterClass *> S = - std::make_pair(*I, RC); + std::make_pair(PR, RC); // If this register class has the requested value type, return it, // otherwise keep searching and return the first class found @@ -5033,16 +5072,17 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, return SDValue(); SDValue Shift, Factor; - if (VT.isFixedLengthVector()) { + if (Op1.getOpcode() == ISD::BUILD_VECTOR) { Shift = DAG.getBuildVector(ShVT, dl, Shifts); Factor = DAG.getBuildVector(VT, dl, Factors); - } else if (VT.isScalableVector()) { + } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) { assert(Shifts.size() == 1 && Factors.size() == 1 && "Expected matchUnaryPredicate to return one element for scalable " "vectors"); Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); Factor = DAG.getSplatVector(VT, dl, Factors[0]); } else { + assert(isa<ConstantSDNode>(Op1) && "Expected a constant"); Shift = Shifts[0]; Factor = Factors[0]; } @@ -5084,11 +5124,25 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); unsigned EltBits = VT.getScalarSizeInBits(); + EVT MulVT; // Check to see if we can do this. // FIXME: We should be more aggressive here. - if (!isTypeLegal(VT)) - return SDValue(); + if (!isTypeLegal(VT)) { + // Limit this to simple scalars for now. + if (VT.isVector() || !VT.isSimple()) + return SDValue(); + + // If this type will be promoted to a large enough type with a legal + // multiply operation, we can go ahead and do this transform. + if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger) + return SDValue(); + + MulVT = getTypeToTransformTo(*DAG.getContext(), VT); + if (MulVT.getSizeInBits() < (2 * EltBits) || + !isOperationLegal(ISD::MUL, MulVT)) + return SDValue(); + } // If the sdiv has an 'exact' bit we can use a simpler lowering. if (N->getFlags().hasExact()) @@ -5134,12 +5188,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue MagicFactor, Factor, Shift, ShiftMask; - if (VT.isFixedLengthVector()) { + if (N1.getOpcode() == ISD::BUILD_VECTOR) { MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); Factor = DAG.getBuildVector(VT, dl, Factors); Shift = DAG.getBuildVector(ShVT, dl, Shifts); ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); - } else if (VT.isScalableVector()) { + } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { assert(MagicFactors.size() == 1 && Factors.size() == 1 && Shifts.size() == 1 && ShiftMasks.size() == 1 && "Expected matchUnaryPredicate to return one element for scalable " @@ -5149,6 +5203,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]); } else { + assert(isa<ConstantSDNode>(N1) && "Expected a constant"); MagicFactor = MagicFactors[0]; Factor = Factors[0]; Shift = Shifts[0]; @@ -5157,17 +5212,32 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value. // FIXME: We should support doing a MUL in a wider type. - SDValue Q; - if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) - : isOperationLegalOrCustom(ISD::MULHS, VT)) - Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor); - else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) - : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { - SDValue LoHi = - DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor); - Q = SDValue(LoHi.getNode(), 1); - } else - return SDValue(); // No mulhs or equivalent. + auto GetMULHS = [&](SDValue X, SDValue Y) { + // If the type isn't legal, use a wider mul of the the type calculated + // earlier. + if (!isTypeLegal(VT)) { + X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X); + Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y); + Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, MulVT, Y, + DAG.getShiftAmountConstant(EltBits, MulVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + + if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization)) + return DAG.getNode(ISD::MULHS, dl, VT, X, Y); + if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) { + SDValue LoHi = + DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); + return SDValue(LoHi.getNode(), 1); + } + return SDValue(); + }; + + SDValue Q = GetMULHS(N0, MagicFactor); + if (!Q) + return SDValue(); + Created.push_back(Q.getNode()); // (Optionally) Add/subtract the numerator using Factor. @@ -5202,11 +5272,25 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); unsigned EltBits = VT.getScalarSizeInBits(); + EVT MulVT; // Check to see if we can do this. // FIXME: We should be more aggressive here. - if (!isTypeLegal(VT)) - return SDValue(); + if (!isTypeLegal(VT)) { + // Limit this to simple scalars for now. + if (VT.isVector() || !VT.isSimple()) + return SDValue(); + + // If this type will be promoted to a large enough type with a legal + // multiply operation, we can go ahead and do this transform. + if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger) + return SDValue(); + + MulVT = getTypeToTransformTo(*DAG.getContext(), VT); + if (MulVT.getSizeInBits() < (2 * EltBits) || + !isOperationLegal(ISD::MUL, MulVT)) + return SDValue(); + } bool UseNPQ = false; SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; @@ -5216,7 +5300,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return false; // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - APInt Divisor = C->getAPIntValue(); + const APInt& Divisor = C->getAPIntValue(); APInt::mu magics = Divisor.magicu(); unsigned PreShift = 0, PostShift = 0; @@ -5261,12 +5345,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue PreShift, PostShift, MagicFactor, NPQFactor; - if (VT.isFixedLengthVector()) { + if (N1.getOpcode() == ISD::BUILD_VECTOR) { PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); - } else if (VT.isScalableVector()) { + } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { assert(PreShifts.size() == 1 && MagicFactors.size() == 1 && NPQFactors.size() == 1 && PostShifts.size() == 1 && "Expected matchUnaryPredicate to return one for scalable vectors"); @@ -5275,6 +5359,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]); PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]); } else { + assert(isa<ConstantSDNode>(N1) && "Expected a constant"); PreShift = PreShifts[0]; MagicFactor = MagicFactors[0]; PostShift = PostShifts[0]; @@ -5286,11 +5371,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should support doing a MUL in a wider type. auto GetMULHU = [&](SDValue X, SDValue Y) { - if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) - : isOperationLegalOrCustom(ISD::MULHU, VT)) + // If the type isn't legal, use a wider mul of the the type calculated + // earlier. + if (!isTypeLegal(VT)) { + X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X); + Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y); + Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, MulVT, Y, + DAG.getShiftAmountConstant(EltBits, MulVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + + if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization)) return DAG.getNode(ISD::MULHU, dl, VT, X, Y); - if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) - : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) { + if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) { SDValue LoHi = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); return SDValue(LoHi.getNode(), 1); @@ -5398,11 +5492,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, EVT VT = REMNode.getValueType(); EVT SVT = VT.getScalarType(); - EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize()); EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. - if (!isOperationLegalOrCustom(ISD::MUL, VT)) + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); bool ComparingWithAllZeros = true; @@ -5511,7 +5605,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, return SDValue(); SDValue PVal, KVal, QVal; - if (VT.isVector()) { + if (D.getOpcode() == ISD::BUILD_VECTOR) { if (HadTautologicalLanes) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. @@ -5525,6 +5619,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, PVal = DAG.getBuildVector(VT, DL, PAmts); KVal = DAG.getBuildVector(ShVT, DL, KAmts); QVal = DAG.getBuildVector(VT, DL, QAmts); + } else if (D.getOpcode() == ISD::SPLAT_VECTOR) { + assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 && + "Expected matchBinaryPredicate to return one element for " + "SPLAT_VECTORs"); + PVal = DAG.getSplatVector(VT, DL, PAmts[0]); + KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]); + QVal = DAG.getSplatVector(VT, DL, QAmts[0]); } else { PVal = PAmts[0]; KVal = KAmts[0]; @@ -5532,7 +5633,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) { - if (!isOperationLegalOrCustom(ISD::SUB, VT)) + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT)) return SDValue(); // FIXME: Could/should use `ISD::ADD`? assert(CompTargetNode.getValueType() == N.getValueType() && "Expecting that the types on LHS and RHS of comparisons match."); @@ -5547,12 +5648,10 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // divisors as a performance improvement, since rotating by 0 is a no-op. if (HadEvenDivisor) { // We need ROTR to do this. - if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDNodeFlags Flags; - Flags.setExact(true); // UREM: (rotr (mul N, P), K) - Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal); Created.push_back(Op0.getNode()); } @@ -5577,6 +5676,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); Created.push_back(TautologicalInvertedChannels.getNode()); + // NOTE: we avoid letting illegal types through even if we're before legalize + // ops – legalization has a hard time producing good code for this. if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { // If we have a vector select, let's replace the comparison results in the // affected lanes with the correct tautological result. @@ -5587,6 +5688,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } // Else, we can just invert the comparison result in the appropriate lanes. + // + // NOTE: see the note above VSELECT above. if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, TautologicalInvertedChannels); @@ -5638,11 +5741,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, EVT VT = REMNode.getValueType(); EVT SVT = VT.getScalarType(); - EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize()); EVT ShSVT = ShVT.getScalarType(); - // If MUL is unavailable, we cannot proceed in any case. - if (!isOperationLegalOrCustom(ISD::MUL, VT)) + // If we are after ops legalization, and MUL is unavailable, we can not + // proceed. + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); // TODO: Could support comparing with non-zero too. @@ -5755,7 +5859,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, return SDValue(); SDValue PVal, AVal, KVal, QVal; - if (VT.isFixedLengthVector()) { + if (D.getOpcode() == ISD::BUILD_VECTOR) { if (HadOneDivisor) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. @@ -5774,7 +5878,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, AVal = DAG.getBuildVector(VT, DL, AAmts); KVal = DAG.getBuildVector(ShVT, DL, KAmts); QVal = DAG.getBuildVector(VT, DL, QAmts); - } else if (VT.isScalableVector()) { + } else if (D.getOpcode() == ISD::SPLAT_VECTOR) { assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 && "Expected matchUnaryPredicate to return one element for scalable " @@ -5784,6 +5888,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]); QVal = DAG.getSplatVector(VT, DL, QAmts[0]); } else { + assert(isa<ConstantSDNode>(D) && "Expected a constant"); PVal = PAmts[0]; AVal = AAmts[0]; KVal = KAmts[0]; @@ -5796,7 +5901,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, if (NeedToApplyOffset) { // We need ADD to do this. - if (!isOperationLegalOrCustom(ISD::ADD, VT)) + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT)) return SDValue(); // (add (mul N, P), A) @@ -5808,12 +5913,10 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // divisors as a performance improvement, since rotating by 0 is a no-op. if (HadEvenDivisor) { // We need ROTR to do this. - if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDNodeFlags Flags; - Flags.setExact(true); // SREM: (rotr (add (mul N, P), A), K) - Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal); Created.push_back(Op0.getNode()); } @@ -5831,10 +5934,13 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // we must fix-up results for said lanes. assert(VT.isVector() && "Can/should only get here for vectors."); + // NOTE: we avoid letting illegal types through even if we're before legalize + // ops – legalization has a hard time producing good code for the code that + // follows. if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || !isOperationLegalOrCustom(ISD::AND, VT) || !isOperationLegalOrCustom(Cond, VT) || - !isOperationLegalOrCustom(ISD::VSELECT, VT)) + !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) return SDValue(); Created.push_back(Fold.getNode()); @@ -5860,8 +5966,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is // constant-folded, select can get lowered to a shuffle with constant mask. - SDValue Blended = - DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin, + MaskedIsZero, Fold); return Blended; } @@ -5935,6 +6041,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); + // Because getNegatedExpression can delete nodes we need a handle to keep + // temporary nodes alive in case the recursion manages to create an identical + // node. + std::list<HandleSDNode> Handles; + switch (Opcode) { case ISD::ConstantFP: { // Don't invert constant FP values after legalization unless the target says @@ -6003,11 +6114,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6052,11 +6170,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6094,15 +6219,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (!NegZ) break; + // Prevent this node from being deleted by the next two calls. + Handles.emplace_back(NegZ); + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = std::min(CostX, CostZ); @@ -6493,6 +6628,58 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, return true; } +void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi, + SelectionDAG &DAG) const { + assert(Node->getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Node->getValueType(0); + unsigned VTBits = VT.getScalarSizeInBits(); + assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected"); + + bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS; + bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS; + SDValue ShOpLo = Node->getOperand(0); + SDValue ShOpHi = Node->getOperand(1); + SDValue ShAmt = Node->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT); + SDLoc dl(Node); + + // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and + // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized + // away during isel. + SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(VTBits - 1, dl, ShAmtVT)); + SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, ShAmtVT)) + : DAG.getConstant(0, dl, VT); + + SDValue Tmp2, Tmp3; + if (IsSHL) { + Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); + } else { + Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); + } + + // If the shift amount is larger or equal than the width of a part we don't + // use the result from the FSHL/FSHR. Insert a test and select the appropriate + // values for large shift amounts. + SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(VTBits, dl, ShAmtVT)); + SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode, + DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE); + + if (IsSHL) { + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); + } else { + Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2); + Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3); + } +} + bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; @@ -6514,7 +6701,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: - // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c + // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c unsigned SrcEltBits = SrcVT.getScalarSizeInBits(); EVT IntVT = SrcVT.changeTypeToInteger(); EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout()); @@ -7006,6 +7193,129 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, return true; } +SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + + if (!VT.isSimple()) + return SDValue(); + + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().getScalarType().SimpleTy) { + default: + return SDValue(); + case MVT::i16: + // Use a rotate by 8. This can be further expanded if necessary. + return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); + APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); + APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); + APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); + + // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + return Tmp; + } + + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift <<= J; + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + std::pair<SDValue, SDValue> TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const { @@ -7490,39 +7800,51 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } -static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, - SDValue Idx, - EVT VecVT, - const SDLoc &dl) { +static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, + EVT VecVT, const SDLoc &dl, + unsigned NumSubElts) { if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) return Idx; EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { - SDValue VS = DAG.getVScale(dl, IdxVT, - APInt(IdxVT.getFixedSizeInBits(), - NElts)); - SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS, - DAG.getConstant(1, dl, IdxVT)); - + // If this is a constant index and we know the value plus the number of the + // elements in the subvector minus one is less than the minimum number of + // elements then it's safe to return Idx. + if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx)) + if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts) + return Idx; + SDValue VS = + DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts)); + unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT; + SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS, + DAG.getConstant(NumSubElts, dl, IdxVT)); return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub); - } else { - if (isPowerOf2_32(NElts)) { - APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), - Log2_32(NElts)); - return DAG.getNode(ISD::AND, dl, IdxVT, Idx, - DAG.getConstant(Imm, dl, IdxVT)); - } } - + if (isPowerOf2_32(NElts) && NumSubElts == 1) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } + unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0; return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, - DAG.getConstant(NElts - 1, dl, IdxVT)); + DAG.getConstant(MaxIndex, dl, IdxVT)); } SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const { + return getVectorSubVecPointer( + DAG, VecPtr, VecVT, + EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1), + Index); +} + +SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, + SDValue VecPtr, EVT VecVT, + EVT SubVecVT, + SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); @@ -7534,7 +7856,13 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); + // Scalable vectors don't need clamping as these are checked at compile time + if (SubVecVT.isFixedLengthVector()) { + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a fixed vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorNumElements()); + } EVT IdxVT = Index.getValueType(); @@ -7717,8 +8045,7 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { unsigned BitWidth = LHS.getScalarValueSizeInBits(); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); - SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), - LHS, RHS); + SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue SumDiff = Result.getValue(0); SDValue Overflow = Result.getValue(1); SDValue Zero = DAG.getConstant(0, dl, VT); @@ -7732,7 +8059,9 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { } // Overflow ? 0xffff.... : (LHS + RHS) return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff); - } else if (Opcode == ISD::USUBSAT) { + } + + if (Opcode == ISD::USUBSAT) { if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { // (LHS - RHS) & ~OverflowMask SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); @@ -7741,17 +8070,17 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { } // Overflow ? 0 : (LHS - RHS) return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); - } else { - // SatMax -> Overflow && SumDiff < 0 - // SatMin -> Overflow && SumDiff >= 0 - APInt MinVal = APInt::getSignedMinValue(BitWidth); - APInt MaxVal = APInt::getSignedMaxValue(BitWidth); - SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); - return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } + + // SatMax -> Overflow && SumDiff < 0 + // SatMin -> Overflow && SumDiff >= 0 + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { @@ -8309,7 +8638,8 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, SDVTList VTs = DAG.getVTList(VT, VT); Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1); return true; - } else if (isOperationLegalOrCustom(DivOpc, VT)) { + } + if (isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor); SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor); @@ -8329,7 +8659,8 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); - unsigned SatWidth = Node->getConstantOperandVal(1); + EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + unsigned SatWidth = SatVT.getScalarSizeInBits(); unsigned DstWidth = DstVT.getScalarSizeInBits(); assert(SatWidth <= DstWidth && "Expected saturation width smaller than result width"); @@ -8420,3 +8751,210 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); } + +SDValue TargetLowering::expandVectorSplice(SDNode *Node, + SelectionDAG &DAG) const { + assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!"); + assert(Node->getValueType(0).isScalableVector() && + "Fixed length vector types expected to use SHUFFLE_VECTOR!"); + + EVT VT = Node->getValueType(0); + SDValue V1 = Node->getOperand(0); + SDValue V2 = Node->getOperand(1); + int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue(); + SDLoc DL(Node); + + // Expand through memory thusly: + // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr + // Store V1, Ptr + // Store V2, Ptr + sizeof(V1) + // If (Imm < 0) + // TrailingElts = -Imm + // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt)) + // else + // Ptr = Ptr + (Imm * sizeof(VT.Elt)) + // Res = Load Ptr + + Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false); + + EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorElementCount() * 2); + SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment); + EVT PtrVT = StackPtr.getValueType(); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + // Store the lo part of CONCAT_VECTORS(V1, V2) + SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo); + // Store the hi part of CONCAT_VECTORS(V1, V2) + SDValue OffsetToV2 = DAG.getVScale( + DL, PtrVT, + APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2); + SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo); + + if (Imm >= 0) { + // Load back the required element. getVectorElementPointer takes care of + // clamping the index if it's out-of-bounds. + StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2)); + // Load the spliced result + return DAG.getLoad(VT, DL, StoreV2, StackPtr, + MachinePointerInfo::getUnknownStack(MF)); + } + + uint64_t TrailingElts = -Imm; + + // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2. + TypeSize EltByteSize = VT.getVectorElementType().getStoreSize(); + SDValue TrailingBytes = + DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT); + + if (TrailingElts > VT.getVectorMinNumElements()) { + SDValue VLBytes = DAG.getVScale( + DL, PtrVT, + APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes); + } + + // Calculate the start address of the spliced result. + StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes); + + // Load the spliced result + return DAG.getLoad(VT, DL, StoreV2, StackPtr2, + MachinePointerInfo::getUnknownStack(MF)); +} + +bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, bool &NeedInvert, + const SDLoc &dl, SDValue &Chain, + bool IsSignaling) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT OpVT = LHS.getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: + llvm_unreachable("Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } + // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; + InvCC = getSetCCInverse(CCCode, OpVT); + if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + // If inverting the condition is not enough, try swapping operands + // on top of it. + InvCC = ISD::getSetCCSwappedOperands(InvCC); + NeedSwap = true; + } + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + if (NeedSwap) + std::swap(LHS, RHS); + return true; + } + + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: + llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETUO: + if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) { + CC1 = ISD::SETUNE; + CC2 = ISD::SETUNE; + Opc = ISD::OR; + break; + } + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && + "If SETUE is expanded, SETOEQ or SETUNE must be legal!"); + NeedInvert = true; + LLVM_FALLTHROUGH; + case ISD::SETO: + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && + "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; + CC2 = ISD::SETOEQ; + Opc = ISD::AND; + break; + case ISD::SETONE: + case ISD::SETUEQ: + // If the SETUO or SETO CC isn't legal, we might be able to use + // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one + // of SETOGT/SETOLT to be legal, the other can be emulated by swapping + // the operands. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + if (!TLI.isCondCodeLegal(CC2, OpVT) && + (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) || + TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) { + CC1 = ISD::SETOGT; + CC2 = ISD::SETOLT; + Opc = ISD::OR; + NeedInvert = ((unsigned)CCCode & 0x8U); + break; + } + LLVM_FALLTHROUGH; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + LLVM_FALLTHROUGH; + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + // If all combinations of inverting the condition and swapping operands + // didn't work then we have no means to expand the condition. + llvm_unreachable("Don't know how to expand this condition!"); + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + } + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), + SetCC2.getValue(1)); + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + return true; + } + } + return false; +} diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 45427dc41e6e..86b559fd6413 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -17,11 +17,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -67,6 +69,7 @@ public: ShadowStackGCLowering(); bool doInitialization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; private: @@ -86,10 +89,12 @@ private: } // end anonymous namespace char ShadowStackGCLowering::ID = 0; +char &llvm::ShadowStackGCLoweringID = ShadowStackGCLowering::ID; INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) @@ -234,8 +239,8 @@ void ShadowStackGCLowering::CollectRoots(Function &F) { SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + for (BasicBlock &BB : F) + for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::gcroot) { @@ -280,6 +285,10 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, return dyn_cast<GetElementPtrInst>(Val); } +void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTreeWrapperPass>(); +} + /// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGCLowering::runOnFunction(Function &F) { // Quick exit for functions that do not use the shadow stack GC. @@ -297,6 +306,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { if (Roots.empty()) return false; + Optional<DomTreeUpdater> DTU; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); + // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); @@ -348,7 +361,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... - EscapeEnumerator EE(F, "gc_cleanup"); + EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, + DTU.hasValue() ? DTU.getPointer() : nullptr); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp index d2fd4a6d8fd9..8211e3d6a9dd 100644 --- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -472,8 +472,12 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. - for (ReturnInst *Return : Returns) - CallInst::Create(UnregisterFn, FuncCtx, "", Return); + for (ReturnInst *Return : Returns) { + Instruction *InsertPoint = Return; + if (CallInst *CI = Return->getParent()->getTerminatingMustTailCall()) + InsertPoint = CI; + CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint); + } return true; } diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp index d2bfdc663edb..c933031ef37d 100644 --- a/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/llvm/lib/CodeGen/SlotIndexes.cpp @@ -83,7 +83,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); for (MachineInstr &MI : MBB) { - if (MI.isDebugInstr()) + if (MI.isDebugOrPseudoInstr()) continue; // Insert a store index for the instr. @@ -241,19 +241,18 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr &MI = *I; - if (!MI.isDebugInstr() && mi2iMap.find(&MI) == mi2iMap.end()) + if (!MI.isDebugOrPseudoInstr() && mi2iMap.find(&MI) == mi2iMap.end()) insertMachineInstrInMaps(MI); } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SlotIndexes::dump() const { - for (IndexList::const_iterator itr = indexList.begin(); - itr != indexList.end(); ++itr) { - dbgs() << itr->getIndex() << " "; + for (const IndexListEntry &ILE : indexList) { + dbgs() << ILE.getIndex() << " "; - if (itr->getInstr()) { - dbgs() << *itr->getInstr(); + if (ILE.getInstr()) { + dbgs() << *ILE.getInstr(); } else { dbgs() << "\n"; } @@ -280,4 +279,3 @@ LLVM_DUMP_METHOD void SlotIndex::dump() const { dbgs() << "\n"; } #endif - diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 4bb50a285497..91da5e49713c 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -121,9 +121,9 @@ struct SpillPlacement::Node { SumLinkWeights += w; // There can be multiple links to the same bundle, add them up. - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) - if (I->second == b) { - I->first += w; + for (std::pair<BlockFrequency, unsigned> &L : Links) + if (L.second == b) { + L.first += w; return; } // This must be the first link to b. @@ -153,11 +153,11 @@ struct SpillPlacement::Node { // Compute the weighted sum of inputs. BlockFrequency SumN = BiasN; BlockFrequency SumP = BiasP; - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { - if (nodes[I->second].Value == -1) - SumN += I->first; - else if (nodes[I->second].Value == 1) - SumP += I->first; + for (std::pair<BlockFrequency, unsigned> &L : Links) { + if (nodes[L.second].Value == -1) + SumN += L.first; + else if (nodes[L.second].Value == 1) + SumP += L.first; } // Each weighted sum is going to be less than the total frequency of the @@ -258,35 +258,33 @@ void SpillPlacement::setThreshold(const BlockFrequency &Entry) { /// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { - for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), - E = LiveBlocks.end(); I != E; ++I) { - BlockFrequency Freq = BlockFrequencies[I->Number]; + for (const BlockConstraint &LB : LiveBlocks) { + BlockFrequency Freq = BlockFrequencies[LB.Number]; // Live-in to block? - if (I->Entry != DontCare) { - unsigned ib = bundles->getBundle(I->Number, false); + if (LB.Entry != DontCare) { + unsigned ib = bundles->getBundle(LB.Number, false); activate(ib); - nodes[ib].addBias(Freq, I->Entry); + nodes[ib].addBias(Freq, LB.Entry); } // Live-out from block? - if (I->Exit != DontCare) { - unsigned ob = bundles->getBundle(I->Number, true); + if (LB.Exit != DontCare) { + unsigned ob = bundles->getBundle(LB.Number, true); activate(ob); - nodes[ob].addBias(Freq, I->Exit); + nodes[ob].addBias(Freq, LB.Exit); } } } /// addPrefSpill - Same as addConstraints(PrefSpill) void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { - for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) { - BlockFrequency Freq = BlockFrequencies[*I]; + for (unsigned B : Blocks) { + BlockFrequency Freq = BlockFrequencies[B]; if (Strong) Freq += Freq; - unsigned ib = bundles->getBundle(*I, false); - unsigned ob = bundles->getBundle(*I, true); + unsigned ib = bundles->getBundle(B, false); + unsigned ob = bundles->getBundle(B, true); activate(ib); activate(ob); nodes[ib].addBias(Freq, PrefSpill); @@ -295,9 +293,7 @@ void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { } void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { - for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; - ++I) { - unsigned Number = *I; + for (unsigned Number : Links) { unsigned ib = bundles->getBundle(Number, false); unsigned ob = bundles->getBundle(Number, true); @@ -377,3 +373,26 @@ SpillPlacement::finish() { ActiveNodes = nullptr; return Perfect; } + +void SpillPlacement::BlockConstraint::print(raw_ostream &OS) const { + auto toString = [](BorderConstraint C) -> StringRef { + switch(C) { + case DontCare: return "DontCare"; + case PrefReg: return "PrefReg"; + case PrefSpill: return "PrefSpill"; + case PrefBoth: return "PrefBoth"; + case MustSpill: return "MustSpill"; + }; + llvm_unreachable("uncovered switch"); + }; + + dbgs() << "{" << Number << ", " + << toString(Entry) << ", " + << toString(Exit) << ", " + << (ChangesValue ? "changes" : "no change") << "}"; +} + +void SpillPlacement::BlockConstraint::dump() const { + print(dbgs()); + dbgs() << "\n"; +} diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h index aa0e07ef92e3..d2273a163025 100644 --- a/llvm/lib/CodeGen/SpillPlacement.h +++ b/llvm/lib/CodeGen/SpillPlacement.h @@ -95,6 +95,9 @@ public: /// the block has a non-PHI def. When this is false, a live-in value on /// the stack can be live-out on the stack without inserting a spill. bool ChangesValue; + + void print(raw_ostream &OS) const; + void dump() const; }; /// prepare - Reset state and prepare for a new spill placement computation. diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index a6a3149ae25b..c70620fd7532 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -94,10 +94,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, // instructions in the block. if (ExceptionalSuccessors.empty()) return LIP.first; - for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { - if ((EHPadSuccessor && I->isCall()) || - I->getOpcode() == TargetOpcode::INLINEASM_BR) { - LIP.second = LIS.getInstructionIndex(*I); + for (const MachineInstr &MI : llvm::reverse(MBB)) { + if ((EHPadSuccessor && MI.isCall()) || + MI.getOpcode() == TargetOpcode::INLINEASM_BR) { + LIP.second = LIS.getInstructionIndex(MI); break; } } @@ -118,6 +118,13 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!VNI) return LIP.first; + // The def of statepoint instruction is a gc relocation and it should be alive + // in landing pad. So we cannot split interval after statepoint instruction. + if (SlotIndex::isSameInstr(VNI->def, LIP.second)) + if (auto *I = LIS.getInstructionFromIndex(LIP.second)) + if (I->getOpcode() == TargetOpcode::STATEPOINT) + return LIP.second; + // If the value leaving MBB was defined after the call in MBB, it can't // really be live-in to the landing pad. This can happen if the landing pad // has a PHI, and this register is undef on the exceptional edge. @@ -357,15 +364,15 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, - LiveIntervals &lis, VirtRegMap &vrm, - MachineDominatorTree &mdt, - MachineBlockFrequencyInfo &mbfi) - : SA(sa), AA(aa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), - TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), - MBFI(mbfi), RegAssign(Allocator) {} +SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, + LiveIntervals &LIS, VirtRegMap &VRM, + MachineDominatorTree &MDT, + MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI) + : SA(SA), AA(AA), LIS(LIS), VRM(VRM), + MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT), + TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), + TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()), + MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {} void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit = &LRE; @@ -557,71 +564,19 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, // First pass: Try to find a perfectly matching subregister index. If none // exists find the one covering the most lanemask bits. - SmallVector<unsigned, 8> PossibleIndexes; - unsigned BestIdx = 0; - unsigned BestCover = 0; const TargetRegisterClass *RC = MRI.getRegClass(FromReg); assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); - for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) { - // Is this index even compatible with the given class? - if (TRI.getSubClassWithSubReg(RC, Idx) != RC) - continue; - LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); - // Early exit if we found a perfect match. - if (SubRegMask == LaneMask) { - BestIdx = Idx; - break; - } - // The index must not cover any lanes outside \p LaneMask. - if ((SubRegMask & ~LaneMask).any()) - continue; - - unsigned PopCount = SubRegMask.getNumLanes(); - PossibleIndexes.push_back(Idx); - if (PopCount > BestCover) { - BestCover = PopCount; - BestIdx = Idx; - } - } + SmallVector<unsigned, 8> Indexes; // Abort if we cannot possibly implement the COPY with the given indexes. - if (BestIdx == 0) + if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes)) report_fatal_error("Impossible to implement partial COPY"); - SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, - BestIdx, DestLI, Late, SlotIndex()); - - // Greedy heuristic: Keep iterating keeping the best covering subreg index - // each time. - LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); - while (LanesLeft.any()) { - unsigned BestIdx = 0; - int BestCover = std::numeric_limits<int>::min(); - for (unsigned Idx : PossibleIndexes) { - LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); - // Early exit if we found a perfect match. - if (SubRegMask == LanesLeft) { - BestIdx = Idx; - break; - } - - // Try to cover as much of the remaining lanes as possible but - // as few of the already covered lanes as possible. - int Cover = (SubRegMask & LanesLeft).getNumLanes() - - (SubRegMask & ~LanesLeft).getNumLanes(); - if (Cover > BestCover) { - BestCover = Cover; - BestIdx = Idx; - } - } - - if (BestIdx == 0) - report_fatal_error("Impossible to implement partial COPY"); - - buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, - DestLI, Late, Def); - LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx); + SlotIndex Def; + for (unsigned BestIdx : Indexes) { + Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, + DestLI, Late, Def); } return Def; @@ -747,6 +702,23 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << ": not live\n"); return End; } + SlotIndex LSP = SA.getLastSplitPoint(&MBB); + if (LSP < Last) { + // It could be that the use after LSP is a def, and thus the ParentVNI + // just selected starts at that def. For this case to exist, the def + // must be part of a tied def/use pair (as otherwise we'd have split + // distinct live ranges into individual live intervals), and thus we + // can insert the def into the VNI of the use and the tied def/use + // pair can live in the resulting interval. + Last = LSP; + ParentVNI = Edit->getParent().getVNInfoAt(Last); + if (!ParentVNI) { + // undef use --> undef tied def + LLVM_DEBUG(dbgs() << ": tied use not live\n"); + return End; + } + } + LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, SA.getLastSplitPointIter(&MBB)); @@ -836,6 +808,12 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { return VNI->def; } +static bool hasTiedUseOf(MachineInstr &MI, unsigned Reg) { + return any_of(MI.defs(), [Reg](const MachineOperand &MO) { + return MO.isReg() && MO.isTied() && MO.getReg() == Reg; + }); +} + void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); @@ -847,6 +825,16 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { // The complement interval will be extended as needed by LICalc.extend(). if (ParentVNI) forceRecompute(0, *ParentVNI); + + // If the last use is tied to a def, we can't mark it as live for the + // interval which includes only the use. That would cause the tied pair + // to end up in two different intervals. + if (auto *MI = LIS.getInstructionFromIndex(End)) + if (hasTiedUseOf(*MI, Edit->getReg())) { + LLVM_DEBUG(dbgs() << "skip overlap due to tied def at end\n"); + return; + } + LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); LLVM_DEBUG(dump()); @@ -862,8 +850,8 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - SlotIndex Def = Copies[i]->def; + for (const VNInfo *C : Copies) { + SlotIndex Def = C->def; MachineInstr *MI = LIS.getInstructionFromIndex(Def); assert(MI && "No instruction for back-copy"); @@ -871,7 +859,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { MachineBasicBlock::iterator MBBI(MI); bool AtBegin; do AtBegin = MBBI == MBB->begin(); - while (!AtBegin && (--MBBI)->isDebugInstr()); + while (!AtBegin && (--MBBI)->isDebugOrPseudoInstr()); LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); LIS.removeVRegDefAt(*LI, Def); @@ -887,12 +875,18 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { if (AssignI.stop() != Def) continue; unsigned RegIdx = AssignI.value(); - if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { + // We could hoist back-copy right after another back-copy. As a result + // MMBI points to copy instruction which is actually dead now. + // We cannot set its stop to MBBI which will be the same as start and + // interval does not support that. + SlotIndex Kill = + AtBegin ? SlotIndex() : LIS.getInstructionIndex(*MBBI).getRegSlot(); + if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg()) || + Kill <= AssignI.start()) { LLVM_DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot(); LLVM_DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -1098,10 +1092,13 @@ void SplitEditor::hoistCopies() { NotToHoistSet.insert(ParentVNI->id); continue; } - SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); - Dom.second = - defFromParent(0, ParentVNI, Last, *Dom.first, - SA.getLastSplitPointIter(Dom.first))->def; + SlotIndex LSP = SA.getLastSplitPoint(Dom.first); + if (LSP <= ParentVNI->def) { + NotToHoistSet.insert(ParentVNI->id); + continue; + } + Dom.second = defFromParent(0, ParentVNI, LSP, *Dom.first, + SA.getLastSplitPointIter(Dom.first))->def; } // Remove redundant back-copies that are now known to be dominated by another @@ -1322,11 +1319,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { SmallVector<ExtPoint,4> ExtPoints; - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), - RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = *RI; + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI.reg_operands(Edit->getReg()))) { MachineInstr *MI = MO.getParent(); - ++RI; // LiveDebugVariables should have handled all DBG_VALUE instructions. if (MI->isDebugValue()) { LLVM_DEBUG(dbgs() << "Zapping " << *MI); @@ -1416,8 +1411,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ - LiveInterval *LI = &LIS.getInterval(*I); + for (const Register &R : *Edit) { + LiveInterval *LI = &LIS.getInterval(R); for (const LiveRange::Segment &S : LI->segments) { // Dead defs end at the dead slot. if (S.end != S.valno->def.getDeadSlot()) @@ -1554,7 +1549,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), VRAI); assert(!LRMap || LRMap->size() == Edit->size()); } @@ -1583,7 +1578,7 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI, void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { openIntv(); - SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB); SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr, LastSplitPoint)); if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) { @@ -1737,7 +1732,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, return; } - SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB); if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) { // @@ -1814,7 +1809,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, << ", enter after " << EnterAfter << (BI.LiveIn ? ", stack-in" : ", defined in block")); - SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB); assert(IntvOut && "Must have register out"); assert(BI.LiveOut && "Must be live-out"); @@ -1864,3 +1859,16 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr)); useIntv(From, Idx); } + +void SplitAnalysis::BlockInfo::print(raw_ostream &OS) const { + OS << "{" << printMBBReference(*MBB) << ", " + << "uses " << FirstInstr << " to " << LastInstr << ", " + << "1st def " << FirstDef << ", " + << (LiveIn ? "live in" : "dead in") << ", " + << (LiveOut ? "live out" : "dead out") << "}"; +} + +void SplitAnalysis::BlockInfo::dump() const { + print(dbgs()); + dbgs() << "\n"; +} diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index a94518f5a4fc..fbcffacb49ab 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -44,6 +44,7 @@ class MachineRegisterInfo; class TargetInstrInfo; class TargetRegisterInfo; class VirtRegMap; +class VirtRegAuxInfo; /// Determines the latest safe point in a block in which we can insert a split, /// spill or other instruction related with CurLI. @@ -131,6 +132,9 @@ public: bool isOneInstr() const { return SlotIndex::isSameInstr(FirstInstr, LastInstr); } + + void print(raw_ostream &OS) const; + void dump() const; }; private: @@ -235,6 +239,10 @@ public: return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num)); } + SlotIndex getLastSplitPoint(MachineBasicBlock *BB) { + return IPA.getLastInsertPoint(*CurLI, *BB); + } + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) { return IPA.getLastInsertPointIter(*CurLI, *BB); } @@ -265,6 +273,7 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor { const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; const MachineBlockFrequencyInfo &MBFI; + VirtRegAuxInfo &VRAI; public: /// ComplementSpillMode - Select how the complement live range should be @@ -450,9 +459,9 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &sa, AAResults &aa, LiveIntervals &lis, - VirtRegMap &vrm, MachineDominatorTree &mdt, - MachineBlockFrequencyInfo &mbfi); + SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS, + VirtRegMap &VRM, MachineDominatorTree &MDT, + MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); @@ -502,7 +511,8 @@ public: SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB); /// overlapIntv - Indicate that all instructions in range should use the open - /// interval, but also let the complement interval be live. + /// interval if End does not have tied-def usage of the register and in this + /// case compliment interval is used. Let the complement interval be live. /// /// This doubles the register pressure, but is sometimes required to deal with /// register uses after the last valid split point. diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index af58204f6db5..162f3aab024d 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -678,9 +678,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { // to this bb). BitVector BetweenStartEnd; BetweenStartEnd.resize(NumSlot); - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred); if (I != SeenStartMap.end()) { BetweenStartEnd |= I->second; } @@ -819,9 +818,8 @@ void StackColoring::calculateLocalLiveness() { // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) { - LivenessMap::const_iterator I = BlockLiveness.find(*PI); + for (MachineBasicBlock *Pred : BB->predecessors()) { + LivenessMap::const_iterator I = BlockLiveness.find(Pred); // PR37130: transformations prior to stack coloring can // sometimes leave behind statically unreachable blocks; these // can be safely skipped here. diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index faf07e90c39c..36e8f129ea15 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -511,7 +511,7 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel, const MachineFrameInfo &MFI = AP.MF->getFrameInfo(); const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); bool HasDynamicFrameSize = - MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF)); + MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(*(AP.MF)); uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize(); auto CurrentIt = FnInfos.find(AP.CurrentFnSym); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 0411faabbcc3..9f229d51b985 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -70,6 +70,7 @@ StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) { INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) @@ -192,7 +193,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // Ignore intrinsics that do not become real instructions. // TODO: Narrow this to intrinsics that have store-like effects. const auto *CI = cast<CallInst>(I); - if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd()) + if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd()) return true; break; } @@ -379,9 +380,8 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, IRBuilder<> &B, bool *SupportsSelectionDAGSP = nullptr) { Value *Guard = TLI->getIRStackGuard(B); - auto GuardMode = TLI->getTargetMachine().Options.StackProtectorGuard; - if ((GuardMode == llvm::StackProtectorGuards::TLS || - GuardMode == llvm::StackProtectorGuards::None) && Guard) + StringRef GuardMode = M->getStackProtectorGuard(); + if ((GuardMode == "tls" || GuardMode.empty()) && Guard) return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard"); // Use SelectionDAG SSP handling, since there isn't an IR guard. @@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() { // instrumentation has already been generated. HasIRCheck = true; + // If we're instrumenting a block with a musttail call, the check has to be + // inserted before the call rather than between it and the return. The + // verifier guarantees that a musttail call is either directly before the + // return or with a single correct bitcast of the return value in between so + // we don't need to worry about many situations here. + Instruction *CheckLoc = RI; + Instruction *Prev = RI->getPrevNonDebugInstruction(); + if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall()) + CheckLoc = Prev; + else if (Prev) { + Prev = Prev->getPrevNonDebugInstruction(); + if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall()) + CheckLoc = Prev; + } + // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. - IRBuilder<> B(RI); + IRBuilder<> B(CheckLoc); LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); Call->setAttributes(GuardCheck->getAttributes()); Call->setCallingConv(GuardCheck->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. - // If we do not support SelectionDAG based tail calls, generate IR level - // tail calls. + // If we do not support SelectionDAG based calls, generate IR level + // calls. // // For each block with a return instruction, convert this: // @@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); + BasicBlock *NewBB = + BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index a6f8974f3343..ebe00bd7402f 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -76,7 +76,7 @@ namespace { // OrigAlignments - Alignments of stack objects before coloring. SmallVector<Align, 16> OrigAlignments; - // OrigSizes - Sizess of stack objects before coloring. + // OrigSizes - Sizes of stack objects before coloring. SmallVector<unsigned, 16> OrigSizes; // AllColors - If index is set, it's a spill slot, i.e. color. @@ -157,12 +157,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { SSRefs.resize(MFI->getObjectIndexEnd()); // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = &*MBBI; - for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); - MII != EE; ++MII) { - MachineInstr &MI = *MII; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isFI()) @@ -474,9 +470,8 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) - (*I)->eraseFromParent(); + for (MachineInstr *MI : toErase) + MI->eraseFromParent(); return changed; } diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 575bf555c489..af735f2a0216 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -216,6 +216,9 @@ bool TailDuplicator::tailDuplicateAndUpdate( // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple + // uses of VReg may invalidate the use iterator when erased. + SmallPtrSet<MachineInstr *, 4> InstrsToRemove; while (UI != MRI->use_end()) { MachineOperand &UseMO = *UI; MachineInstr *UseMI = UseMO.getParent(); @@ -225,13 +228,15 @@ bool TailDuplicator::tailDuplicateAndUpdate( // a debug instruction that is a kill. // FIXME: Should it SSAUpdate job to delete debug instructions // instead of replacing the use with undef? - UseMI->eraseFromParent(); + InstrsToRemove.insert(UseMI); continue; } if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } + for (auto *MI : InstrsToRemove) + MI->eraseFromParent(); } SSAUpdateVRs.clear(); @@ -683,7 +688,7 @@ bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { return false; if (TailBB->pred_empty()) return false; - MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); + MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(true); if (I == TailBB->end()) return true; return I->isUnconditionalBranch(); @@ -1035,10 +1040,9 @@ void TailDuplicator::removeDeadBlock( MachineFunction *MF = MBB->getParent(); // Update the call site info. - std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + for (const MachineInstr &MI : *MBB) if (MI.shouldUpdateCallSiteInfo()) MF->eraseCallSiteInfo(&MI); - }); if (RemovalCallback) (*RemovalCallback)(MBB); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 165860ef1aa8..2e4a656ea0c8 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -472,36 +472,33 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, return nullptr; } -void TargetInstrInfo::getNoop(MCInst &NopInst) const { - llvm_unreachable("Not implemented"); -} +MCInst TargetInstrInfo::getNop() const { llvm_unreachable("Not implemented"); } -static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, - ArrayRef<unsigned> Ops, int FrameIndex, - const TargetInstrInfo &TII) { - unsigned StartIdx = 0; - unsigned NumDefs = 0; +std::pair<unsigned, unsigned> +TargetInstrInfo::getPatchpointUnfoldableRange(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case TargetOpcode::STACKMAP: { + case TargetOpcode::STACKMAP: // StackMapLiveValues are foldable - StartIdx = StackMapOpers(&MI).getVarIdx(); - break; - } - case TargetOpcode::PATCHPOINT: { + return std::make_pair(0, StackMapOpers(&MI).getVarIdx()); + case TargetOpcode::PATCHPOINT: // For PatchPoint, the call args are not foldable (even if reported in the // stackmap e.g. via anyregcc). - StartIdx = PatchPointOpers(&MI).getVarIdx(); - break; - } - case TargetOpcode::STATEPOINT: { + return std::make_pair(0, PatchPointOpers(&MI).getVarIdx()); + case TargetOpcode::STATEPOINT: // For statepoints, fold deopt and gc arguments, but not call arguments. - StartIdx = StatepointOpers(&MI).getVarIdx(); - NumDefs = MI.getNumDefs(); - break; - } + return std::make_pair(MI.getNumDefs(), StatepointOpers(&MI).getVarIdx()); default: llvm_unreachable("unexpected stackmap opcode"); } +} + +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, int FrameIndex, + const TargetInstrInfo &TII) { + unsigned StartIdx = 0; + unsigned NumDefs = 0; + // getPatchpointUnfoldableRange throws guarantee if MI is not a patchpoint. + std::tie(NumDefs, StartIdx) = TII.getPatchpointUnfoldableRange(MI); unsigned DefToFoldIdx = MI.getNumOperands(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 28c8bd0a7ded..3c5dd29036db 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -45,7 +45,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -114,17 +113,6 @@ static bool darwinHasSinCos(const Triple &TT) { return true; } -// Although this default value is arbitrary, it is not random. It is assumed -// that a condition that evaluates the same way by a higher percentage than this -// is best represented as control flow. Therefore, the default value N should be -// set such that the win from N% correct executions is greater than the loss -// from (100 - N)% mispredicted executions for the majority of intended targets. -static cl::opt<int> MinPercentageForPredictableBranch( - "min-predictable-branch", cl::init(99), - cl::desc("Minimum percentage (0-100) that a condition must be either true " - "or false to assume that the condition is predictable"), - cl::Hidden); - void TargetLoweringBase::InitLibcalls(const Triple &TT) { #define HANDLE_LIBCALL(code, name) \ setLibcallName(RTLIB::code, name); @@ -223,6 +211,23 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { } } +/// GetFPLibCall - Helper to return the right libcall for the given floating +/// point type, or UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { @@ -481,6 +486,11 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) { + return getFPLibCall(RetVT, POWI_F32, POWI_F64, POWI_F80, POWI_F128, + POWI_PPCF128); +} + RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT) { unsigned ModeN, ModelN; @@ -803,6 +813,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::SUBE, VT, Expand); + // Absolute difference + setOperationAction(ISD::ABDS, VT, Expand); + setOperationAction(ISD::ABDU, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -849,6 +863,9 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand); setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand); + + // Named vector shuffles default to expand. + setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -985,9 +1002,6 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { if (NumElts.isScalar()) return LegalizeKind(TypeScalarizeVector, EltVT); - if (VT.getVectorElementCount() == ElementCount::getScalable(1)) - report_fatal_error("Cannot legalize this vector"); - // Try to widen vector elements until the element type is a power of two and // promote it to a legal type later on, for example: // <3 x i8> -> <4 x i8> -> <4 x i32> @@ -1005,9 +1019,12 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If type is to be expanded, split the vector. // <4 x i140> -> <2 x i140> - if (LK.first == TypeExpandInteger) + if (LK.first == TypeExpandInteger) { + if (VT.getVectorElementCount().isScalable()) + return LegalizeKind(TypeScalarizeScalableVector, EltVT); return LegalizeKind(TypeSplitVector, VT.getHalfNumVectorElementsVT(Context)); + } // Promote the integer element types until a legal vector type is found // or until the element integer type is too big. If a legal type was not @@ -1066,6 +1083,9 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { return LegalizeKind(TypeWidenVector, NVT); } + if (VT.getVectorElementCount() == ElementCount::getScalable(1)) + return LegalizeKind(TypeScalarizeScalableVector, EltVT); + // Vectors with illegal element types are expanded. EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount().divideCoefficientBy(2)); @@ -1263,11 +1283,11 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties( const TargetRegisterInfo *TRI) { - static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE, "Too many value types for ValueTypeActions to hold!"); // Everything defaults to needing one register. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { NumRegistersForVT[i] = 1; RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; } @@ -1479,7 +1499,7 @@ void TargetLoweringBase::computeRegisterProperties( // not a sub-register class / subreg register class) legal register class for // a group of value types. For example, on i386, i8, i16, and i32 // representative would be GR32; while on x86_64 it's GR64. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i); @@ -1506,10 +1526,10 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { /// This method returns the number of registers needed, and the VT for each /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. -unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, - EVT &IntermediateVT, - unsigned &NumIntermediates, - MVT &RegisterVT) const { +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, + EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { ElementCount EltCnt = VT.getVectorElementCount(); // If there is a wider vector type with the same element type as this one, @@ -1518,7 +1538,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); - if (EltCnt.getKnownMinValue() != 1 && + if (!EltCnt.isScalar() && (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { @@ -1690,7 +1710,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( // For example, the ABI alignment may change based on software platform while // this function should only be affected by hardware implementation. Type *Ty = VT.getTypeForEVT(Context); - if (Alignment >= DL.getABITypeAlign(Ty)) { + if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) *Fast = true; @@ -1698,8 +1718,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( } // This is a misaligned access. - return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment.value(), Flags, - Fast); + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); } bool TargetLoweringBase::allowsMemoryAccessForAlignment( @@ -1734,10 +1753,6 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, MMO.getAlign(), MMO.getFlags(), Fast); } -BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { - return BranchProbability(MinPercentageForPredictableBranch, 100); -} - //===----------------------------------------------------------------------===// // TargetTransformInfo Helpers //===----------------------------------------------------------------------===// @@ -1821,19 +1836,22 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { llvm_unreachable("Unknown instruction type encountered!"); } -std::pair<int, MVT> +std::pair<InstructionCost, MVT> TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { LLVMContext &C = Ty->getContext(); EVT MTy = getValueType(DL, Ty); - int Cost = 1; + InstructionCost Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting // we need to handle two types. while (true) { LegalizeKind LK = getTypeConversion(C, MTy); + if (LK.first == TypeScalarizeScalableVector) + return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty)); + if (LK.first == TypeLegal) return std::make_pair(Cost, MTy.getSimpleVT()); @@ -1849,8 +1867,9 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, } } -Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, - bool UseTLS) const { +Value * +TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, + bool UseTLS) const { // compiler-rt provides a variable with a magic name. Targets that do not // link with compiler-rt may also provide such a variable. Module *M = IRB.GetInsertBlock()->getParent()->getParent(); @@ -1881,7 +1900,8 @@ Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, return UnsafeStackPtr; } -Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { +Value * +TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { if (!TM.getTargetTriple().isAndroid()) return getDefaultSafeStackPointerLocation(IRB, true); @@ -1941,7 +1961,7 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, // For OpenBSD return its special guard variable. Otherwise return nullptr, // so that SelectionDAG handle SSP. -Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { +Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const { if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); PointerType *PtrTy = Type::getInt8PtrTy(M.getContext()); @@ -2243,6 +2263,24 @@ TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, return Flags; } +Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) + return Builder.CreateFence(Ord); + else + return nullptr; +} + +Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isAcquireOrStronger(Ord)) + return Builder.CreateFence(Ord); + else + return nullptr; +} + //===----------------------------------------------------------------------===// // GlobalISel Hooks //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index fe64b38cf0be..add34eccc1f3 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -21,6 +21,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -47,6 +48,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionGOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSectionXCOFF.h" @@ -153,7 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CM == CodeModel::Small || CM == CodeModel::Medium) - ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); + ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); } else { PersonalityEncoding = (CM == CodeModel::Small || CM == CodeModel::Medium) @@ -293,6 +295,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, } } +void TargetLoweringObjectFileELF::getModuleMetadata(Module &M) { + SmallVector<GlobalValue *, 4> Vec; + collectUsedGlobalVariables(M, Vec, false); + for (GlobalValue *GV : Vec) + if (auto *GO = dyn_cast<GlobalObject>(GV)) + Used.insert(GO); +} + void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { auto &C = getContext(); @@ -315,7 +325,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) { auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES, - ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); + ELF::SHF_MERGE | ELF::SHF_STRINGS, 1); Streamer.SwitchSection(S); @@ -522,8 +532,10 @@ static const Comdat *getELFComdat(const GlobalValue *GV) { if (!C) return nullptr; - if (C->getSelectionKind() != Comdat::Any) - report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + + if (C->getSelectionKind() != Comdat::Any && + C->getSelectionKind() != Comdat::NoDeduplicate) + report_fatal_error("ELF COMDATs only support SelectionKind::Any and " + "SelectionKind::NoDeduplicate, '" + C->getName() + "' cannot be lowered."); return C; @@ -624,6 +636,8 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, Name.push_back('.'); TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true); } else if (HasPrefix) + // For distinguishing between .text.${text-section-prefix}. (with trailing + // dot) and .text.${function-name} Name.push_back('.'); return Name; } @@ -640,8 +654,85 @@ public: }; } -MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( - const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { +/// Calculate an appropriate unique ID for a section, and update Flags, +/// EntrySize and NextUniqueID where appropriate. +static unsigned +calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, + SectionKind Kind, const TargetMachine &TM, + MCContext &Ctx, Mangler &Mang, unsigned &Flags, + unsigned &EntrySize, unsigned &NextUniqueID, + const bool Retain, const bool ForceUnique) { + // Increment uniqueID if we are forced to emit a unique section. + // This works perfectly fine with section attribute or pragma section as the + // sections with the same name are grouped together by the assembler. + if (ForceUnique) + return NextUniqueID++; + + // A section can have at most one associated section. Put each global with + // MD_associated in a unique section. + const bool Associated = GO->getMetadata(LLVMContext::MD_associated); + if (Associated) { + Flags |= ELF::SHF_LINK_ORDER; + return NextUniqueID++; + } + + if (Retain) { + if (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) + Flags |= ELF::SHF_GNU_RETAIN; + return NextUniqueID++; + } + + // If two symbols with differing sizes end up in the same mergeable section + // that section can be assigned an incorrect entry size. To avoid this we + // usually put symbols of the same size into distinct mergeable sections with + // the same name. Doing so relies on the ",unique ," assembly feature. This + // feature is not avalible until bintuils version 2.35 + // (https://sourceware.org/bugzilla/show_bug.cgi?id=25380). + const bool SupportsUnique = Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35); + if (!SupportsUnique) { + Flags &= ~ELF::SHF_MERGE; + EntrySize = 0; + return MCContext::GenericSectionID; + } + + const bool SymbolMergeable = Flags & ELF::SHF_MERGE; + const bool SeenSectionNameBefore = + Ctx.isELFGenericMergeableSection(SectionName); + // If this is the first ocurrence of this section name, treat it as the + // generic section + if (!SymbolMergeable && !SeenSectionNameBefore) + return MCContext::GenericSectionID; + + // Symbols must be placed into sections with compatible entry sizes. Generate + // unique sections for symbols that have not been assigned to compatible + // sections. + const auto PreviousID = + Ctx.getELFUniqueIDForEntsize(SectionName, Flags, EntrySize); + if (PreviousID) + return *PreviousID; + + // If the user has specified the same section name as would be created + // implicitly for this symbol e.g. .rodata.str1.1, then we don't need + // to unique the section as the entry size for this symbol will be + // compatible with implicitly created sections. + SmallString<128> ImplicitSectionNameStem = + getELFSectionNameForGlobal(GO, Kind, Mang, TM, EntrySize, false); + if (SymbolMergeable && + Ctx.isELFImplicitMergeableSectionNamePrefix(SectionName) && + SectionName.startswith(ImplicitSectionNameStem)) + return MCContext::GenericSectionID; + + // We have seen this section name before, but with different flags or entity + // size. Create a new unique ID. + return NextUniqueID++; +} + +static MCSection *selectExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM, + MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID, + bool Retain, bool ForceUnique) { StringRef SectionName = GO->getSection(); // Check if '#pragma clang section' name is applicable. @@ -669,76 +760,30 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( Kind = getELFKindForNamedSection(SectionName, Kind); StringRef Group = ""; + bool IsComdat = false; unsigned Flags = getELFSectionFlags(Kind); if (const Comdat *C = getELFComdat(GO)) { Group = C->getName(); + IsComdat = C->getSelectionKind() == Comdat::Any; Flags |= ELF::SHF_GROUP; } unsigned EntrySize = getEntrySizeForKind(Kind); + const unsigned UniqueID = calcUniqueIDUpdateFlagsAndSize( + GO, SectionName, Kind, TM, Ctx, Mang, Flags, EntrySize, NextUniqueID, + Retain, ForceUnique); - // A section can have at most one associated section. Put each global with - // MD_associated in a unique section. - unsigned UniqueID = MCContext::GenericSectionID; const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); - if (GO->getMetadata(LLVMContext::MD_associated)) { - UniqueID = NextUniqueID++; - Flags |= ELF::SHF_LINK_ORDER; - } else { - if (getContext().getAsmInfo()->useIntegratedAssembler() || - getContext().getAsmInfo()->binutilsIsAtLeast(2, 35)) { - // Symbols must be placed into sections with compatible entry - // sizes. Generate unique sections for symbols that have not - // been assigned to compatible sections. - if (Flags & ELF::SHF_MERGE) { - auto maybeID = getContext().getELFUniqueIDForEntsize(SectionName, Flags, - EntrySize); - if (maybeID) - UniqueID = *maybeID; - else { - // If the user has specified the same section name as would be created - // implicitly for this symbol e.g. .rodata.str1.1, then we don't need - // to unique the section as the entry size for this symbol will be - // compatible with implicitly created sections. - SmallString<128> ImplicitSectionNameStem = getELFSectionNameForGlobal( - GO, Kind, getMangler(), TM, EntrySize, false); - if (!(getContext().isELFImplicitMergeableSectionNamePrefix( - SectionName) && - SectionName.startswith(ImplicitSectionNameStem))) - UniqueID = NextUniqueID++; - } - } else { - // We need to unique the section if the user has explicity - // assigned a non-mergeable symbol to a section name for - // a generic mergeable section. - if (getContext().isELFGenericMergeableSection(SectionName)) { - auto maybeID = getContext().getELFUniqueIDForEntsize( - SectionName, Flags, EntrySize); - UniqueID = maybeID ? *maybeID : NextUniqueID++; - } - } - } else { - // If two symbols with differing sizes end up in the same mergeable - // section that section can be assigned an incorrect entry size. To avoid - // this we usually put symbols of the same size into distinct mergeable - // sections with the same name. Doing so relies on the ",unique ," - // assembly feature. This feature is not avalible until bintuils - // version 2.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=25380). - Flags &= ~ELF::SHF_MERGE; - EntrySize = 0; - } - } - - MCSectionELF *Section = getContext().getELFSection( - SectionName, getELFSectionType(SectionName, Kind), Flags, - EntrySize, Group, UniqueID, LinkedToSym); + MCSectionELF *Section = Ctx.getELFSection( + SectionName, getELFSectionType(SectionName, Kind), Flags, EntrySize, + Group, IsComdat, UniqueID, LinkedToSym); // Make sure that we did not get some other section with incompatible sh_link. // This should not be possible due to UniqueID code above. assert(Section->getLinkedToSymbol() == LinkedToSym && "Associated symbol mismatch between sections"); - if (!(getContext().getAsmInfo()->useIntegratedAssembler() || - getContext().getAsmInfo()->binutilsIsAtLeast(2, 35))) { + if (!(Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 35))) { // If we are using GNU as before 2.35, then this symbol might have // been placed in an incompatible mergeable section. Emit an error if this // is the case to avoid creating broken output. @@ -757,15 +802,24 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( return Section; } +MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + return selectExplicitSectionGlobal(GO, Kind, TM, getContext(), getMangler(), + NextUniqueID, Used.count(GO), + /* ForceUnique = */false); +} + static MCSectionELF *selectELFSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags, unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) { StringRef Group = ""; + bool IsComdat = false; if (const Comdat *C = getELFComdat(GO)) { Flags |= ELF::SHF_GROUP; Group = C->getName(); + IsComdat = C->getSelectionKind() == Comdat::Any; } // Get the section entry size based on the kind. @@ -788,7 +842,30 @@ static MCSectionELF *selectELFSectionForGlobal( if (Kind.isExecuteOnly()) UniqueID = 0; return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, - EntrySize, Group, UniqueID, AssociatedSymbol); + EntrySize, Group, IsComdat, UniqueID, + AssociatedSymbol); +} + +static MCSection *selectELFSectionForGlobal( + MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool Retain, bool EmitUniqueSection, + unsigned Flags, unsigned *NextUniqueID) { + const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); + if (LinkedToSym) { + EmitUniqueSection = true; + Flags |= ELF::SHF_LINK_ORDER; + } + if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) { + EmitUniqueSection = true; + Flags |= ELF::SHF_GNU_RETAIN; + } + + MCSectionELF *Section = selectELFSectionForGlobal( + Ctx, GO, Kind, Mang, TM, EmitUniqueSection, Flags, + NextUniqueID, LinkedToSym); + assert(Section->getLinkedToSymbol() == LinkedToSym); + return Section; } MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( @@ -805,18 +882,25 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( EmitUniqueSection = TM.getDataSections(); } EmitUniqueSection |= GO->hasComdat(); + return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM, + Used.count(GO), EmitUniqueSection, Flags, + &NextUniqueID); +} - const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); - if (LinkedToSym) { - EmitUniqueSection = true; - Flags |= ELF::SHF_LINK_ORDER; - } - - MCSectionELF *Section = selectELFSectionForGlobal( - getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags, - &NextUniqueID, LinkedToSym); - assert(Section->getLinkedToSymbol() == LinkedToSym); - return Section; +MCSection *TargetLoweringObjectFileELF::getUniqueSectionForFunction( + const Function &F, const TargetMachine &TM) const { + SectionKind Kind = SectionKind::getText(); + unsigned Flags = getELFSectionFlags(Kind); + // If the function's section names is pre-determined via pragma or a + // section attribute, call selectExplicitSectionGlobal. + if (F.hasSection() || F.hasFnAttribute("implicit-section-name")) + return selectExplicitSectionGlobal( + &F, Kind, TM, getContext(), getMangler(), NextUniqueID, + Used.count(&F), /* ForceUnique = */true); + else + return selectELFSectionForGlobal( + getContext(), &F, Kind, getMangler(), TM, Used.count(&F), + /*EmitUniqueSection=*/true, Flags, &NextUniqueID); } MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( @@ -834,9 +918,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( /* AssociatedSymbol */ nullptr); } -MCSection * -TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F, - const TargetMachine &TM) const { +MCSection *TargetLoweringObjectFileELF::getSectionForLSDA( + const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { // If neither COMDAT nor function sections, use the monolithic LSDA section. // Re-use this path if LSDASection is null as in the Arm EHABI. if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections())) @@ -844,31 +927,30 @@ TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F, const auto *LSDA = cast<MCSectionELF>(LSDASection); unsigned Flags = LSDA->getFlags(); + const MCSymbolELF *LinkedToSym = nullptr; StringRef Group; - if (F.hasComdat()) { - Group = F.getComdat()->getName(); + bool IsComdat = false; + if (const Comdat *C = getELFComdat(&F)) { Flags |= ELF::SHF_GROUP; + Group = C->getName(); + IsComdat = C->getSelectionKind() == Comdat::Any; + } + // Use SHF_LINK_ORDER to facilitate --gc-sections if we can use GNU ld>=2.36 + // or LLD, which support mixed SHF_LINK_ORDER & non-SHF_LINK_ORDER. + if (TM.getFunctionSections() && + (getContext().getAsmInfo()->useIntegratedAssembler() && + getContext().getAsmInfo()->binutilsIsAtLeast(2, 36))) { + Flags |= ELF::SHF_LINK_ORDER; + LinkedToSym = cast<MCSymbolELF>(&FnSym); } // Append the function name as the suffix like GCC, assuming // -funique-section-names applies to .gcc_except_table sections. - if (TM.getUniqueSectionNames()) - return getContext().getELFSection(LSDA->getName() + "." + F.getName(), - LSDA->getType(), Flags, 0, Group, - MCSection::NonUniqueID, nullptr); - - // Allocate a unique ID if function sections && (integrated assembler or GNU - // as>=2.35). Note we could use SHF_LINK_ORDER to facilitate --gc-sections but - // that would require that we know the linker is a modern LLD (12.0 or later). - // GNU ld as of 2.35 does not support mixed SHF_LINK_ORDER & - // non-SHF_LINK_ORDER components in an output section - // https://sourceware.org/bugzilla/show_bug.cgi?id=26256 - unsigned ID = TM.getFunctionSections() && - getContext().getAsmInfo()->useIntegratedAssembler() - ? NextUniqueID++ - : MCSection::NonUniqueID; - return getContext().getELFSection(LSDA->getName(), LSDA->getType(), Flags, 0, - Group, ID, nullptr); + return getContext().getELFSection( + (TM.getUniqueSectionNames() ? LSDA->getName() + "." + F.getName() + : LSDA->getName()), + LSDA->getType(), Flags, 0, Group, IsComdat, MCSection::NonUniqueID, + LinkedToSym); } bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( @@ -920,7 +1002,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( } else { Name += MBB.getParent()->getSection()->getName(); if (TM.getUniqueBasicBlockSectionNames()) { - Name += "."; + if (!Name.endswith(".")) + Name += "."; Name += MBB.getSymbol()->getName(); } else { UniqueID = NextUniqueID++; @@ -934,8 +1017,8 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( GroupName = F.getComdat()->getName().str(); } return getContext().getELFSection(Name, ELF::SHT_PROGBITS, Flags, - 0 /* Entry Size */, GroupName, UniqueID, - nullptr); + 0 /* Entry Size */, GroupName, + F.hasComdat(), UniqueID, nullptr); } static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, @@ -944,7 +1027,7 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, std::string Name; unsigned Type; unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; - StringRef COMDAT = KeySym ? KeySym->getName() : ""; + StringRef Comdat = KeySym ? KeySym->getName() : ""; if (KeySym) Flags |= ELF::SHF_GROUP; @@ -973,7 +1056,7 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, Type = ELF::SHT_PROGBITS; } - return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT); + return Ctx.getELFSection(Name, Type, Flags, 0, Comdat, /*IsComdat=*/true); } MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( @@ -1027,7 +1110,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const { // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the // same name. return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS, - ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); + ELF::SHF_MERGE | ELF::SHF_STRINGS, 1); } void @@ -1107,13 +1190,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, - TAA, TAAParsed, StubSize); - if (!ErrorCode.empty()) + if (Error E = MCSectionMachO::ParseSectionSpecifier( + SectionVal, Segment, Section, TAA, TAAParsed, StubSize)) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Invalid section specifier '" + Section + "': " + - ErrorCode + "."); + report_fatal_error("Invalid section specifier '" + Section + + "': " + toString(std::move(E)) + "."); + } // Get the section. MCSectionMachO *S = getContext().getMachOSection( @@ -1137,6 +1219,14 @@ static void checkMachOComdat(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + + StringRef SectionName = GO->getSection(); + + const Function *F = dyn_cast<Function>(GO); + if (F && F->hasFnAttribute("implicit-section-name")) { + SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); + } + // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -1144,14 +1234,12 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( checkMachOComdat(GO); - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section, - TAA, TAAParsed, StubSize); - if (!ErrorCode.empty()) { + if (Error E = MCSectionMachO::ParseSectionSpecifier( + SectionName, Segment, Section, TAA, TAAParsed, StubSize)) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GO->getName() + "' has an invalid section specifier '" + - GO->getSection() + "': " + ErrorCode + "."); + GO->getSection() + "': " + toString(std::move(E)) + "."); } // Get the section. @@ -1393,11 +1481,10 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, if (!AsmInfo.isSectionAtomizableBySymbols(Section)) return true; - // If it is not dead stripped, it is safe to use private labels. - const MCSectionMachO &SMO = cast<MCSectionMachO>(Section); - if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) - return true; - + // FIXME: we should be able to use private labels for sections that can't be + // dead-stripped (there's no issue with blocking atomization there), but `ld + // -r` sometimes drops the no_dead_strip attribute from sections so for safety + // we don't allow it. return false; } @@ -1485,7 +1572,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) { return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; case Comdat::Largest: return COFF::IMAGE_COMDAT_SELECT_LARGEST; - case Comdat::NoDuplicates: + case Comdat::NoDeduplicate: return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; case Comdat::SameSize: return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; @@ -1576,7 +1663,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( // Append "$symbol" to the section name *before* IR-level mangling is // applied when targetting mingw. This is what GCC does, and the ld.bfd // COFF linker will not properly handle comdats otherwise. - if (getTargetTriple().isWindowsGNUEnvironment()) + if (getContext().getTargetTriple().isWindowsGNUEnvironment()) raw_svector_ostream(Name) << '$' << ComdatGV->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, @@ -1693,7 +1780,8 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( std::string Flags; for (const GlobalValue &GV : M.global_values()) { raw_string_ostream OS(Flags); - emitLinkerFlagsForGlobalCOFF(OS, &GV, getTargetTriple(), getMangler()); + emitLinkerFlagsForGlobalCOFF(OS, &GV, getContext().getTargetTriple(), + getMangler()); OS.flush(); if (!Flags.empty()) { Streamer.SwitchSection(getDrectveSection()); @@ -1717,7 +1805,8 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( continue; raw_string_ostream OS(Flags); - emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); + emitLinkerFlagsForUsedCOFF(OS, GV, getContext().getTargetTriple(), + getMangler()); OS.flush(); if (!Flags.empty()) { @@ -1796,16 +1885,16 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true, - Priority, KeySym, - cast<MCSectionCOFF>(StaticCtorSection)); + return getCOFFStaticStructorSection( + getContext(), getContext().getTargetTriple(), true, Priority, KeySym, + cast<MCSectionCOFF>(StaticCtorSection)); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false, - Priority, KeySym, - cast<MCSectionCOFF>(StaticDtorSection)); + return getCOFFStaticStructorSection( + getContext(), getContext().getTargetTriple(), false, Priority, KeySym, + cast<MCSectionCOFF>(StaticDtorSection)); } const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( @@ -1841,7 +1930,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; - std::string HexString = AI.toString(16, /*Signed=*/false); + std::string HexString = toString(AI, 16, /*Signed=*/false); llvm::transform(HexString, HexString.begin(), tolower); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); @@ -1934,6 +2023,20 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) { return C; } +static unsigned getWasmSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (K.isThreadLocal()) + Flags |= wasm::WASM_SEG_FLAG_TLS; + + if (K.isMergeableCString()) + Flags |= wasm::WASM_SEG_FLAG_STRINGS; + + // TODO(sbc): Add suport for K.isMergeableConst() + + return Flags; +} + MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // We don't support explict section names for functions in the wasm object @@ -1957,9 +2060,9 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( Group = C->getName(); } - MCSectionWasm* Section = - getContext().getWasmSection(Name, Kind, Group, - MCContext::GenericSectionID); + unsigned Flags = getWasmSectionFlags(Kind); + MCSectionWasm *Section = getContext().getWasmSection( + Name, Kind, Flags, Group, MCContext::GenericSectionID); return Section; } @@ -1991,7 +2094,8 @@ static MCSectionWasm *selectWasmSectionForGlobal( (*NextUniqueID)++; } - return Ctx.getWasmSection(Name, Kind, Group, UniqueID); + unsigned Flags = getWasmSectionFlags(Kind); + return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID); } MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( @@ -2075,14 +2179,26 @@ bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock( if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry()) return false; - const Function *Per = - dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + const GlobalValue *Per = + dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); + assert(Per && "Personality routine is not a GlobalValue type."); if (isNoOpWithoutInvoke(classifyEHPersonality(Per))) return false; return true; } +bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB( + const MachineFunction *MF) { + const Function &F = MF->getFunction(); + if (!F.hasStackProtectorFnAttr()) + return false; + // FIXME: check presence of canary word + // There are cases that the stack protectors are not really inserted even if + // the attributes are on. + return true; +} + MCSymbol * TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) { return MF->getMMI().getContext().getOrCreateSymbol( @@ -2101,6 +2217,12 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, // function entry point. We choose to always return a function descriptor // here. if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) { + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->hasAttribute("toc-data")) + return cast<MCSectionXCOFF>( + SectionForGlobal(GVar, SectionKind::getData(), TM)) + ->getQualNameSymbol(); + if (GO->isDeclarationForLinker()) return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) ->getQualNameSymbol(); @@ -2110,8 +2232,8 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, return cast<MCSectionXCOFF>( getSectionForFunctionDescriptor(cast<Function>(GO), TM)) ->getQualNameSymbol(); - if ((TM.getDataSections() && !GO->hasSection()) || GOKind.isCommon() || - GOKind.isBSSLocal()) + if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() || + GOKind.isBSSLocal() || GOKind.isThreadBSSLocal()) return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM)) ->getQualNameSymbol(); } @@ -2126,6 +2248,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( report_fatal_error("#pragma clang section is not yet supported"); StringRef SectionName = GO->getSection(); + + // Handle the XCOFF::TD case first, then deal with the rest. + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO)) + if (GVar->hasAttribute("toc-data")) + return getContext().getXCOFFSection( + SectionName, Kind, + XCOFF::CsectProperties(/*MappingClass*/ XCOFF::XMC_TD, XCOFF::XTY_SD), + /* MultiSymbolsAllowed*/ true); + XCOFF::StorageMappingClass MappingClass; if (Kind.isText()) MappingClass = XCOFF::XMC_PR; @@ -2136,8 +2267,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( else report_fatal_error("XCOFF other section types not yet implemented."); - return getContext().getXCOFFSection(SectionName, MappingClass, XCOFF::XTY_SD, - Kind, /* MultiSymbolsAllowed*/ true); + return getContext().getXCOFFSection( + SectionName, Kind, XCOFF::CsectProperties(MappingClass, XCOFF::XTY_SD), + /* MultiSymbolsAllowed*/ true); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( @@ -2148,22 +2280,41 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( SmallString<128> Name; getNameWithPrefix(Name, GO, TM); + XCOFF::StorageMappingClass SMC = + isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA; + if (GO->isThreadLocal()) + SMC = XCOFF::XMC_UL; + // Externals go into a csect of type ER. return getContext().getXCOFFSection( - Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER, - SectionKind::getMetadata()); + Name, SectionKind::getMetadata(), + XCOFF::CsectProperties(SMC, XCOFF::XTY_ER)); } MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + // Handle the XCOFF::TD case first, then deal with the rest. + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO)) + if (GVar->hasAttribute("toc-data")) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection( + Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, XCOFF::XTY_SD), + /* MultiSymbolsAllowed*/ true); + } + // Common symbols go into a csect with matching name which will get mapped // into the .bss section. - if (Kind.isBSSLocal() || Kind.isCommon()) { + // Zero-initialized local TLS symbols go into a csect with matching name which + // will get mapped into the .tbss section. + if (Kind.isBSSLocal() || GO->hasCommonLinkage() || Kind.isThreadBSSLocal()) { SmallString<128> Name; getNameWithPrefix(Name, GO, TM); + XCOFF::StorageMappingClass SMC = Kind.isBSSLocal() ? XCOFF::XMC_BS + : Kind.isCommon() ? XCOFF::XMC_RW + : XCOFF::XMC_UL; return getContext().getXCOFFSection( - Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM, - Kind); + Name, Kind, XCOFF::CsectProperties(SMC, XCOFF::XTY_CM)); } if (Kind.isMergeableCString()) { @@ -2179,7 +2330,7 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( getNameWithPrefix(Name, GO, TM); return getContext().getXCOFFSection( - Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind, + Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD), /* MultiSymbolsAllowed*/ !TM.getDataSections()); } @@ -2202,8 +2353,9 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( if (TM.getDataSections()) { SmallString<128> Name; getNameWithPrefix(Name, GO, TM); - return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD, - SectionKind::getData()); + return getContext().getXCOFFSection( + Name, SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)); } return DataSection; } @@ -2212,12 +2364,27 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( if (TM.getDataSections()) { SmallString<128> Name; getNameWithPrefix(Name, GO, TM); - return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD, - SectionKind::getReadOnly()); + return getContext().getXCOFFSection( + Name, SectionKind::getReadOnly(), + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)); } return ReadOnlySection; } + // External/weak TLS data and initialized local TLS data are not eligible + // to be put into common csect. If data sections are enabled, thread + // data are emitted into separate sections. Otherwise, thread data + // are emitted into the .tdata section. + if (Kind.isThreadLocal()) { + if (TM.getDataSections()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection( + Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TL, XCOFF::XTY_SD)); + } + return TLSDataSection; + } + report_fatal_error("XCOFF other section types not yet implemented."); } @@ -2232,8 +2399,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable( // the table doesn't prevent the removal. SmallString<128> NameStr(".rodata.jmp.."); getNameWithPrefix(NameStr, &F, TM); - return getContext().getXCOFFSection(NameStr, XCOFF::XMC_RO, XCOFF::XTY_SD, - SectionKind::getReadOnly()); + return getContext().getXCOFFSection( + NameStr, SectionKind::getReadOnly(), + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)); } bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( @@ -2324,9 +2492,11 @@ MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol( Func->isDeclaration()) && isa<Function>(Func)) { return getContext() - .getXCOFFSection(NameStr, XCOFF::XMC_PR, - Func->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, - SectionKind::getText()) + .getXCOFFSection( + NameStr, SectionKind::getText(), + XCOFF::CsectProperties(XCOFF::XMC_PR, Func->isDeclaration() + ? XCOFF::XTY_ER + : XCOFF::XTY_SD)) ->getQualNameSymbol(); } @@ -2337,8 +2507,9 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor( const Function *F, const TargetMachine &TM) const { SmallString<128> NameStr; getNameWithPrefix(NameStr, F, TM); - return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD, - SectionKind::getData()); + return getContext().getXCOFFSection( + NameStr, SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_DS, XCOFF::XTY_SD)); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( @@ -2346,7 +2517,29 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( // Use TE storage-mapping class when large code model is enabled so that // the chance of needing -bbigtoc is decreased. return getContext().getXCOFFSection( - cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), - TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC, - XCOFF::XTY_SD, SectionKind::getData()); + cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(), + XCOFF::CsectProperties( + TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC, + XCOFF::XTY_SD)); +} + +//===----------------------------------------------------------------------===// +// GOFF +//===----------------------------------------------------------------------===// +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() + : TargetLoweringObjectFile() {} + +MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + return SelectSectionForGlobal(GO, Kind, TM); +} + +MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + auto *Symbol = TM.getSymbol(GO); + if (Kind.isBSS()) + return getContext().getGOFFSection(Symbol->getName(), + SectionKind::getBSS()); + + return getContext().getObjectFileInfo()->getTextSection(); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e844d03854e2..4024fd452fc4 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Discriminator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" @@ -165,6 +166,13 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort( clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", "Disable the abort but emit a diagnostic on failure"))); +// An option that disables inserting FS-AFDO discriminators before emit. +// This is mainly for debugging and tuning purpose. +static cl::opt<bool> + FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, + cl::desc("Do not insert FS-AFDO discriminators before " + "emit.")); + // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). @@ -334,6 +342,8 @@ struct InsertedPass { namespace llvm { +extern cl::opt<bool> EnableFSDiscriminator; + class PassConfigImpl { public: // List of passes explicitly substituted by this target. Normally this is @@ -847,8 +857,8 @@ void TargetPassConfig::addIRPasses() { // Run GC lowering passes for builtin collectors // TODO: add a pass insertion point here - addPass(createGCLoweringPass()); - addPass(createShadowStackGCLoweringPass()); + addPass(&GCLoweringID); + addPass(&ShadowStackGCLoweringID); addPass(createLowerConstantIntrinsicsPass()); // Make sure that no unreachable blocks are instruction selected. @@ -858,11 +868,16 @@ void TargetPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createReplaceWithVeclibLegacyPass()); + if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) addPass(createPartiallyInlineLibCallsPass()); - // Instrument function entry and exit, e.g. with calls to mcount(). - addPass(createPostInlineEntryExitInstrumenterPass()); + // Expand vector predication intrinsics into standard IR instructions. + // This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction + // passes since it emits those kinds of intrinsics. + addPass(createExpandVectorPredicationPass()); // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, @@ -924,7 +939,6 @@ void TargetPassConfig::addPassesToHandleExceptions() { void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) addPass(createCodeGenPreparePass()); - addPass(createRewriteSymbolsPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for @@ -1109,6 +1123,8 @@ void TargetPassConfig::addMachinePasses() { // Run post-ra passes. addPostRegAlloc(); + addPass(&RemoveRedundantDebugValuesID, false); + addPass(&FixupStatepointCallerSavedID); // Insert prolog/epilog code. Eliminate abstract frame index references... @@ -1162,6 +1178,14 @@ void TargetPassConfig::addMachinePasses() { addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); + if (EnableFSDiscriminator && !FSNoFinalDiscrim) + // Add FS discriminators here so that all the instruction duplicates + // in different BBs get their own discriminators. With this, we can "sum" + // the SampleFDO counters instead of using MAX. This will improve the + // SampleFDO profile quality. + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::PassLast)); + addPreEmitPass(); if (TM->Options.EnableIPRA) @@ -1187,12 +1211,14 @@ void TargetPassConfig::addMachinePasses() { } // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. - if (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter) { - addPass(createMachineFunctionSplitterPass()); - } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { + // cannot be enabled at the same time. Basic block sections takes precedence. + // FIXME: In principle, BasicBlockSection::Labels and splitting can used + // together. Update this check once we have addressed any issues. + if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + } else if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { + addPass(createMachineFunctionSplitterPass()); } // Add passes that directly emit MI after all other MI passes. @@ -1309,11 +1335,15 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { } bool TargetPassConfig::addRegAssignAndRewriteFast() { - if (RegAlloc != &useDefaultRegisterAllocator && - RegAlloc != &createFastRegisterAllocator) + if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator && + RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator) report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); addPass(createRegAllocPass(false)); + + // Allow targets to change the register assignments after + // fast register allocation. + addPostFastRegAllocRewrite(); return true; } diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 5fd7eef5808f..f4bb71535f7f 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -225,6 +225,23 @@ TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const { return BestRC; } +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClassLLT(MCRegister reg, LLT Ty) const { + assert(Register::isPhysicalRegister(reg) && + "reg must be a physical register"); + + // Pick the most sub register class of the right type that contains + // this physreg. + const TargetRegisterClass *BestRC = nullptr; + for (const TargetRegisterClass *RC : regclasses()) { + if ((!Ty.isValid() || isTypeLegalForClass(*RC, Ty)) && RC->contains(reg) && + (!BestRC || BestRC->hasSubClass(RC))) + BestRC = RC; + } + + return BestRC; +} + /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, @@ -250,8 +267,9 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, } // Mask out the reserved registers - BitVector Reserved = getReservedRegs(MF); - Allocatable &= Reserved.flip(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const BitVector &Reserved = MRI.getReservedRegs(); + Allocatable.reset(Reserved); return Allocatable; } @@ -461,21 +479,13 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction().hasFnAttribute("no-realign-stack"); } -bool TargetRegisterInfo::needsStackRealignment( - const MachineFunction &MF) const { +bool TargetRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const Function &F = MF.getFunction(); - Align StackAlign = TFI->getStackAlign(); - bool requiresRealignment = ((MFI.getMaxAlign() > StackAlign) || - F.hasFnAttribute(Attribute::StackAlignment)); - if (F.hasFnAttribute("stackrealign") || requiresRealignment) { - if (canRealignStack(MF)) - return true; - LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() - << "\n"); - } - return false; + return F.hasFnAttribute("stackrealign") || + (MFI.getMaxAlign() > TFI->getStackAlign()) || + F.hasFnAttribute(Attribute::StackAlignment); } bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, @@ -510,6 +520,77 @@ TargetRegisterInfo::getRegSizeInBits(Register Reg, return getRegSizeInBits(*RC); } +bool TargetRegisterInfo::getCoveringSubRegIndexes( + const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, + LaneBitmask LaneMask, SmallVectorImpl<unsigned> &NeededIndexes) const { + SmallVector<unsigned, 8> PossibleIndexes; + unsigned BestIdx = 0; + unsigned BestCover = 0; + + for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) { + // Is this index even compatible with the given class? + if (getSubClassWithSubReg(RC, Idx) != RC) + continue; + LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LaneMask) { + BestIdx = Idx; + break; + } + + // The index must not cover any lanes outside \p LaneMask. + if ((SubRegMask & ~LaneMask).any()) + continue; + + unsigned PopCount = SubRegMask.getNumLanes(); + PossibleIndexes.push_back(Idx); + if (PopCount > BestCover) { + BestCover = PopCount; + BestIdx = Idx; + } + } + + // Abort if we cannot possibly implement the COPY with the given indexes. + if (BestIdx == 0) + return 0; + + NeededIndexes.push_back(BestIdx); + + // Greedy heuristic: Keep iterating keeping the best covering subreg index + // each time. + LaneBitmask LanesLeft = LaneMask & ~getSubRegIndexLaneMask(BestIdx); + while (LanesLeft.any()) { + unsigned BestIdx = 0; + int BestCover = std::numeric_limits<int>::min(); + for (unsigned Idx : PossibleIndexes) { + LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LanesLeft) { + BestIdx = Idx; + break; + } + + // Try to cover as much of the remaining lanes as possible but + // as few of the already covered lanes as possible. + int Cover = (SubRegMask & LanesLeft).getNumLanes() - + (SubRegMask & ~LanesLeft).getNumLanes(); + if (Cover > BestCover) { + BestCover = Cover; + BestIdx = Idx; + } + } + + if (BestIdx == 0) + return 0; // Impossible to handle + + NeededIndexes.push_back(BestIdx); + + LanesLeft &= ~getSubRegIndexLaneMask(BestIdx); + } + + return BestIdx; +} + Register TargetRegisterInfo::lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const { diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index ecee4aed7f88..1664b4dadfec 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -527,6 +527,11 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA, if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge)) return false; + // Look for other target specific commute preference. + bool Commute; + if (TII->hasCommutePreference(*MI, Commute)) + return Commute; + // Since there are no intervening uses for both registers, then commute // if the def of RegC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; @@ -801,8 +806,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill( MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; for (MachineInstr &OtherMI : make_range(End, KillPos)) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -974,8 +979,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI( unsigned NumVisited = 0; for (MachineInstr &OtherMI : make_range(mi, MachineBasicBlock::iterator(KillMI))) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -1357,11 +1362,9 @@ void TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { - bool IsEarlyClobber = false; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); - IsEarlyClobber |= DstMO.isEarlyClobber(); - } + bool IsEarlyClobber = llvm::find_if(TiedPairs, [MI](auto const &TP) { + return MI->getOperand(TP.second).isEarlyClobber(); + }) != TiedPairs.end(); bool RemovedKillFlag = false; bool AllUsesCopied = true; @@ -1369,9 +1372,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SlotIndex LastCopyIdx; Register RegB = 0; unsigned SubRegB = 0; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; + for (auto &TP : TiedPairs) { + unsigned SrcIdx = TP.first; + unsigned DstIdx = TP.second; const MachineOperand &DstMO = MI->getOperand(DstIdx); Register RegA = DstMO.getReg(); @@ -1549,9 +1552,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { .set(MachineFunctionProperties::Property::TiedOpsRewritten); TiedOperandMap TiedOperands; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MBB = &*MBBI; + for (MachineBasicBlock &MBBI : *MF) { + MBB = &MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index a42095d8718a..2ce6ea1d4212 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -30,9 +30,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -923,9 +920,6 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) return false; - if (ToPromote < 2) - return false; - IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth, CurrentVisited, Sources, Sinks, SafeWrap); Promoter.Mutate(); @@ -952,7 +946,8 @@ bool TypePromotion::runOnFunction(Function &F) { const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); const TargetTransformInfo &TII = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - RegisterBitWidth = TII.getRegisterBitWidth(false); + RegisterBitWidth = + TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize(); Ctx = &F.getParent()->getContext(); // Search up from icmps to try to promote their operands. diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index f5dc589a98cb..c9a19948ff2f 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -114,25 +114,23 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // Loop over all dead blocks, remembering them and deleting all instructions // in them. std::vector<MachineBasicBlock*> DeadBlocks; - for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = &*I; - + for (MachineBasicBlock &BB : F) { // Test for deadness. - if (!Reachable.count(BB)) { - DeadBlocks.push_back(BB); + if (!Reachable.count(&BB)) { + DeadBlocks.push_back(&BB); // Update dominator and loop info. - if (MLI) MLI->removeBlock(BB); - if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + if (MLI) MLI->removeBlock(&BB); + if (MDT && MDT->getNode(&BB)) MDT->eraseNode(&BB); - while (BB->succ_begin() != BB->succ_end()) { - MachineBasicBlock* succ = *BB->succ_begin(); + while (BB.succ_begin() != BB.succ_end()) { + MachineBasicBlock* succ = *BB.succ_begin(); MachineBasicBlock::iterator start = succ->begin(); while (start != succ->end() && start->isPHI()) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && - start->getOperand(i).getMBB() == BB) { + start->getOperand(i).getMBB() == &BB) { start->RemoveOperand(i); start->RemoveOperand(i-1); } @@ -140,7 +138,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { start++; } - BB->removeSuccessor(BB->succ_begin()); + BB.removeSuccessor(BB.succ_begin()); } } } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 978357d8f539..9daebfd9e63d 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -15,18 +15,20 @@ using namespace llvm; EVT EVT::changeExtendedTypeToInteger() const { + assert(isExtended() && "Type is not extended!"); LLVMContext &Context = LLVMTy->getContext(); return getIntegerVT(Context, getSizeInBits()); } EVT EVT::changeExtendedVectorElementTypeToInteger() const { + assert(isExtended() && "Type is not extended!"); LLVMContext &Context = LLVMTy->getContext(); EVT IntTy = getIntegerVT(Context, getScalarSizeInBits()); - return getVectorVT(Context, IntTy, getVectorNumElements(), - isScalableVector()); + return getVectorVT(Context, IntTy, getVectorElementCount()); } EVT EVT::changeExtendedVectorElementType(EVT EltVT) const { + assert(isExtended() && "Type is not extended!"); LLVMContext &Context = LLVMTy->getContext(); return getVectorVT(Context, EltVT, getVectorElementCount()); } @@ -196,6 +198,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); + case MVT::externref: + return PointerType::get(StructType::create(Context), 10); + case MVT::funcref: + return PointerType::get(StructType::create(Context), 20); case MVT::v1i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: @@ -236,6 +242,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt8Ty(Context), 128); case MVT::v256i8: return FixedVectorType::get(Type::getInt8Ty(Context), 256); + case MVT::v512i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 512); + case MVT::v1024i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 1024); case MVT::v1i16: return FixedVectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: @@ -254,6 +264,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt16Ty(Context), 64); case MVT::v128i16: return FixedVectorType::get(Type::getInt16Ty(Context), 128); + case MVT::v256i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 256); + case MVT::v512i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 512); case MVT::v1i32: return FixedVectorType::get(Type::getInt32Ty(Context), 1); case MVT::v2i32: @@ -264,6 +278,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt32Ty(Context), 4); case MVT::v5i32: return FixedVectorType::get(Type::getInt32Ty(Context), 5); + case MVT::v6i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 6); + case MVT::v7i32: + return FixedVectorType::get(Type::getInt32Ty(Context), 7); case MVT::v8i32: return FixedVectorType::get(Type::getInt32Ty(Context), 8); case MVT::v16i32: @@ -286,6 +304,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return FixedVectorType::get(Type::getInt64Ty(Context), 2); + case MVT::v3i64: + return FixedVectorType::get(Type::getInt64Ty(Context), 3); case MVT::v4i64: return FixedVectorType::get(Type::getInt64Ty(Context), 4); case MVT::v8i64: @@ -302,6 +322,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt64Ty(Context), 256); case MVT::v1i128: return FixedVectorType::get(Type::getInt128Ty(Context), 1); + case MVT::v1f16: + return FixedVectorType::get(Type::getHalfTy(Context), 1); case MVT::v2f16: return FixedVectorType::get(Type::getHalfTy(Context), 2); case MVT::v3f16: @@ -318,6 +340,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getHalfTy(Context), 64); case MVT::v128f16: return FixedVectorType::get(Type::getHalfTy(Context), 128); + case MVT::v256f16: + return FixedVectorType::get(Type::getHalfTy(Context), 256); + case MVT::v512f16: + return FixedVectorType::get(Type::getHalfTy(Context), 512); case MVT::v2bf16: return FixedVectorType::get(Type::getBFloatTy(Context), 2); case MVT::v3bf16: @@ -344,6 +370,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getFloatTy(Context), 4); case MVT::v5f32: return FixedVectorType::get(Type::getFloatTy(Context), 5); + case MVT::v6f32: + return FixedVectorType::get(Type::getFloatTy(Context), 6); + case MVT::v7f32: + return FixedVectorType::get(Type::getFloatTy(Context), 7); case MVT::v8f32: return FixedVectorType::get(Type::getFloatTy(Context), 8); case MVT::v16f32: @@ -366,6 +396,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return FixedVectorType::get(Type::getDoubleTy(Context), 2); + case MVT::v3f64: + return FixedVectorType::get(Type::getDoubleTy(Context), 3); case MVT::v4f64: return FixedVectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: @@ -456,6 +488,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return ScalableVectorType::get(Type::getHalfTy(Context), 16); case MVT::nxv32f16: return ScalableVectorType::get(Type::getHalfTy(Context), 32); + case MVT::nxv1bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 1); case MVT::nxv2bf16: return ScalableVectorType::get(Type::getBFloatTy(Context), 2); case MVT::nxv4bf16: diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 5e0ff9d9092c..0f164e2637a2 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -94,12 +95,18 @@ void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { unsigned Size = TRI->getSpillSize(*RC); Align Alignment = TRI->getSpillAlign(*RC); + // Set preferred alignment if we are still able to realign the stack + auto &ST = MF->getSubtarget(); + Align CurrentAlign = ST.getFrameLowering()->getStackAlign(); + if (Alignment > CurrentAlign && !ST.getRegisterInfo()->canRealignStack(*MF)) { + Alignment = CurrentAlign; + } int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Alignment); ++NumSpillSlots; return SS; } -bool VirtRegMap::hasPreferredPhys(Register VirtReg) { +bool VirtRegMap::hasPreferredPhys(Register VirtReg) const { Register Hint = MRI->getSimpleHint(VirtReg); if (!Hint.isValid()) return false; @@ -108,7 +115,7 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) { return Register(getPhys(VirtReg)) == Hint; } -bool VirtRegMap::hasKnownPreference(Register VirtReg) { +bool VirtRegMap::hasKnownPreference(Register VirtReg) const { std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg); if (Register::isPhysicalRegister(Hint.second)) return true; @@ -181,27 +188,35 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; + LiveDebugVariables *DebugVars; + DenseSet<Register> RewriteRegs; + bool ClearVirtRegs; void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; - void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const; - void handleIdentityCopy(MachineInstr &MI) const; + void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const; + void handleIdentityCopy(MachineInstr &MI); void expandCopyBundle(MachineInstr &MI) const; bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const; public: static char ID; - - VirtRegRewriter() : MachineFunctionPass(ID) {} + VirtRegRewriter(bool ClearVirtRegs_ = true) : + MachineFunctionPass(ID), + ClearVirtRegs(ClearVirtRegs_) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction&) override; MachineFunctionProperties getSetProperties() const override { - return MachineFunctionProperties().set( + if (ClearVirtRegs) { + return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); + } + + return MachineFunctionProperties(); } }; @@ -224,12 +239,17 @@ INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); AU.addRequired<VirtRegMap>(); + + if (!ClearVirtRegs) + AU.addPreserved<LiveDebugVariables>(); + MachineFunctionPass::getAnalysisUsage(AU); } @@ -241,6 +261,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { Indexes = &getAnalysis<SlotIndexes>(); LIS = &getAnalysis<LiveIntervals>(); VRM = &getAnalysis<VirtRegMap>(); + DebugVars = getAnalysisIfAvailable<LiveDebugVariables>(); LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getName() << '\n'); LLVM_DEBUG(VRM->dump()); @@ -254,18 +275,24 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { // Rewrite virtual registers. rewrite(); - // Write out new DBG_VALUE instructions. - getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + if (DebugVars && ClearVirtRegs) { + // Write out new DBG_VALUE instructions. + + // We only do this if ClearVirtRegs is specified since this should be the + // final run of the pass and we don't want to emit them multiple times. + DebugVars->emitDebugValues(VRM); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); + } - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers and release all the transient data. - VRM->clearAllVirt(); - MRI->clearVirtRegs(); return true; } void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, - Register PhysReg) const { + MCRegister PhysReg) const { assert(!LI.empty()); assert(LI.hasSubRanges()); @@ -321,7 +348,12 @@ void VirtRegRewriter::addMBBLiveIns() { // This is a virtual register that is live across basic blocks. Its // assigned PhysReg must be marked as live-in to those blocks. Register PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); + if (PhysReg == VirtRegMap::NO_PHYS_REG) { + // There may be no physical register assigned if only some register + // classes were already allocated. + assert(!ClearVirtRegs && "Unmapped virtual register"); + continue; + } if (LI.hasSubRanges()) { addLiveInsForSubRanges(LI, PhysReg); @@ -372,12 +404,21 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { return true; } -void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { +void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { if (!MI.isIdentityCopy()) return; LLVM_DEBUG(dbgs() << "Identity copy: " << MI); ++NumIdCopies; + Register DstReg = MI.getOperand(0).getReg(); + + // We may have deferred allocation of the virtual register, and the rewrite + // regs code doesn't handle the liveness update. + if (DstReg.isVirtual()) + return; + + RewriteRegs.insert(DstReg); + // Copies like: // %r0 = COPY undef %r0 // %al = COPY %al, implicit-def %eax @@ -517,8 +558,12 @@ void VirtRegRewriter::rewrite() { continue; Register VirtReg = MO.getReg(); MCRegister PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg != VirtRegMap::NO_PHYS_REG && - "Instruction uses unmapped VirtReg"); + if (PhysReg == VirtRegMap::NO_PHYS_REG) + continue; + + assert(Register(PhysReg).isPhysical()); + + RewriteRegs.insert(PhysReg); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. @@ -590,4 +635,21 @@ void VirtRegRewriter::rewrite() { handleIdentityCopy(*MI); } } + + if (LIS) { + // Don't bother maintaining accurate LiveIntervals for registers which were + // already allocated. + for (Register PhysReg : RewriteRegs) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); + ++Units) { + LIS->removeRegUnit(*Units); + } + } + } + + RewriteRegs.clear(); +} + +FunctionPass *llvm::createVirtRegRewriter(bool ClearVirtRegs) { + return new VirtRegRewriter(ClearVirtRegs); } diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 53424556682d..c4c84cd921fa 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -77,21 +77,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BreadthFirstIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -117,19 +108,14 @@ class WasmEHPrepare : public FunctionPass { FunctionCallee CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper - bool prepareEHPads(Function &F); bool prepareThrows(Function &F); - - bool IsEHPadFunctionsSetUp = false; - void setupEHPadFunctions(Function &F); - void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false, - unsigned Index = 0); + bool prepareEHPads(Function &F); + void prepareEHPad(BasicBlock *BB, bool NeedPersonality, unsigned Index = 0); public: static char ID; // Pass identification, replacement for typeid WasmEHPrepare() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -142,16 +128,11 @@ public: char WasmEHPrepare::ID = 0; INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", false, false) FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } -void WasmEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTreeWrapperPass>(); -} - bool WasmEHPrepare::doInitialization(Module &M) { IRBuilder<> IRB(M.getContext()); LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index @@ -164,19 +145,18 @@ bool WasmEHPrepare::doInitialization(Module &M) { // Erase the specified BBs if the BB does not have any remaining predecessors, // and also all its dead children. template <typename Container> -static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) { +static void eraseDeadBBsAndChildren(const Container &BBs) { SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end()); while (!WL.empty()) { auto *BB = WL.pop_back_val(); if (!pred_empty(BB)) continue; WL.append(succ_begin(BB), succ_end(BB)); - DeleteDeadBlock(BB, DTU); + DeleteDeadBlock(BB); } } bool WasmEHPrepare::runOnFunction(Function &F) { - IsEHPadFunctionsSetUp = false; bool Changed = false; Changed |= prepareThrows(F); Changed |= prepareEHPads(F); @@ -184,9 +164,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) { } bool WasmEHPrepare::prepareThrows(Function &F) { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DomTreeUpdater DTU(&DT, /*PostDominatorTree*/ nullptr, - DomTreeUpdater::UpdateStrategy::Eager); Module &M = *F.getParent(); IRBuilder<> IRB(F.getContext()); bool Changed = false; @@ -209,102 +186,30 @@ bool WasmEHPrepare::prepareThrows(Function &F) { InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); IRB.SetInsertPoint(BB); IRB.CreateUnreachable(); - eraseDeadBBsAndChildren(Succs, &DTU); + eraseDeadBBsAndChildren(Succs); } return Changed; } bool WasmEHPrepare::prepareEHPads(Function &F) { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - bool Changed = false; + Module &M = *F.getParent(); + IRBuilder<> IRB(F.getContext()); - // There are two things to decide: whether we need a personality function call - // and whether we need a `wasm.lsda()` call and its store. - // - // For the personality function call, catchpads with `catch (...)` and - // cleanuppads don't need it, because exceptions are always caught. Others all - // need it. - // - // For `wasm.lsda()` and its store, in order to minimize the number of them, - // we need a way to figure out whether we have encountered `wasm.lsda()` call - // in any of EH pads that dominates the current EH pad. To figure that out, we - // now visit EH pads in BFS order in the dominator tree so that we visit - // parent BBs first before visiting its child BBs in the domtree. - // - // We keep a set named `ExecutedLSDA`, which basically means "Do we have - // `wasm.lsda() either in the current EH pad or any of its parent EH pads in - // the dominator tree?". This is to prevent scanning the domtree up to the - // root every time we examine an EH pad, in the worst case: each EH pad only - // needs to check its immediate parent EH pad. - // - // - If any of its parent EH pads in the domtree has `wasm.lsda`, this means - // we don't need `wasm.lsda()` in the current EH pad. We also insert the - // current EH pad in `ExecutedLSDA` set. - // - If none of its parent EH pad has `wasm.lsda()`, - // - If the current EH pad is a `catch (...)` or a cleanuppad, done. - // - If the current EH pad is neither a `catch (...)` nor a cleanuppad, - // add `wasm.lsda()` and the store in the current EH pad, and add the - // current EH pad to `ExecutedLSDA` set. - // - // TODO Can we not store LSDA address in user function but make libcxxabi - // compute it? - DenseSet<Value *> ExecutedLSDA; - unsigned Index = 0; - for (auto DomNode : breadth_first(&DT)) { - auto *BB = DomNode->getBlock(); - auto *Pad = BB->getFirstNonPHI(); - if (!Pad || (!isa<CatchPadInst>(Pad) && !isa<CleanupPadInst>(Pad))) + SmallVector<BasicBlock *, 16> CatchPads; + SmallVector<BasicBlock *, 16> CleanupPads; + for (BasicBlock &BB : F) { + if (!BB.isEHPad()) continue; - Changed = true; - - Value *ParentPad = nullptr; - if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) { - ParentPad = CPI->getCatchSwitch()->getParentPad(); - if (ExecutedLSDA.count(ParentPad)) { - ExecutedLSDA.insert(CPI); - // We insert its associated catchswitch too, because - // FuncletPadInst::getParentPad() returns a CatchSwitchInst if the child - // FuncletPadInst is a CleanupPadInst. - ExecutedLSDA.insert(CPI->getCatchSwitch()); - } - } else { // CleanupPadInst - ParentPad = cast<CleanupPadInst>(Pad)->getParentPad(); - if (ExecutedLSDA.count(ParentPad)) - ExecutedLSDA.insert(Pad); - } - - if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) { - if (CPI->getNumArgOperands() == 1 && - cast<Constant>(CPI->getArgOperand(0))->isNullValue()) - // In case of a single catch (...), we need neither personality call nor - // wasm.lsda() call - prepareEHPad(BB, false); - else { - if (ExecutedLSDA.count(CPI)) - // catch (type), but one of parents already has wasm.lsda() call - prepareEHPad(BB, true, false, Index++); - else { - // catch (type), and none of parents has wasm.lsda() call. We have to - // add the call in this EH pad, and record this EH pad in - // ExecutedLSDA. - ExecutedLSDA.insert(CPI); - ExecutedLSDA.insert(CPI->getCatchSwitch()); - prepareEHPad(BB, true, true, Index++); - } - } - } else if (isa<CleanupPadInst>(Pad)) { - // Cleanup pads need neither personality call nor wasm.lsda() call - prepareEHPad(BB, false); - } + auto *Pad = BB.getFirstNonPHI(); + if (isa<CatchPadInst>(Pad)) + CatchPads.push_back(&BB); + else if (isa<CleanupPadInst>(Pad)) + CleanupPads.push_back(&BB); } + if (CatchPads.empty() && CleanupPads.empty()) + return false; - return Changed; -} - -void WasmEHPrepare::setupEHPadFunctions(Function &F) { - Module &M = *F.getParent(); - IRBuilder<> IRB(F.getContext()); assert(F.hasPersonalityFn() && "Personality function not found"); // __wasm_lpad_context global variable @@ -336,16 +241,30 @@ void WasmEHPrepare::setupEHPadFunctions(Function &F) { "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()); if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee())) F->setDoesNotThrow(); + + unsigned Index = 0; + for (auto *BB : CatchPads) { + auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI()); + // In case of a single catch (...), we don't need to emit a personalify + // function call + if (CPI->getNumArgOperands() == 1 && + cast<Constant>(CPI->getArgOperand(0))->isNullValue()) + prepareEHPad(BB, false); + else + prepareEHPad(BB, true, Index++); + } + + // Cleanup pads don't need a personality function call. + for (auto *BB : CleanupPads) + prepareEHPad(BB, false); + + return true; } // Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is // ignored. void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, - bool NeedLSDA, unsigned Index) { - if (!IsEHPadFunctionsSetUp) { - IsEHPadFunctionsSetUp = true; - setupEHPadFunctions(*BB->getParent()); - } + unsigned Index) { assert(BB->isEHPad() && "BB is not an EHPad!"); IRBuilder<> IRB(BB->getContext()); IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); @@ -361,8 +280,8 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, } } - // Cleanup pads w/o __clang_call_terminate call do not have any of - // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing. + // Cleanup pads do not have any of wasm.get.exception() or + // wasm.get.ehselector() calls. We need to do nothing. if (!GetExnCI) { assert(!GetSelectorCI && "wasm.get.ehselector() cannot exist w/o wasm.get.exception()"); @@ -399,9 +318,11 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, IRB.CreateStore(IRB.getInt32(Index), LPadIndexField); auto *CPI = cast<CatchPadInst>(FPI); - if (NeedLSDA) - // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); - IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); + // TODO Sometimes storing the LSDA address every time is not necessary, in + // case it is already set in a dominating EH pad and there is no function call + // between from that EH pad to here. Consider optimizing those cases. + // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); + IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); // Pseudocode: _Unwind_CallPersonality(exn); CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI, @@ -436,9 +357,9 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) { const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) // Currently there should be only one handler per a catchswitch. - EHInfo.setEHPadUnwindDest(&BB, *CatchSwitch->handlers().begin()); + EHInfo.setUnwindDest(&BB, *CatchSwitch->handlers().begin()); else // cleanuppad - EHInfo.setEHPadUnwindDest(&BB, UnwindBB); + EHInfo.setUnwindDest(&BB, UnwindBB); } } } diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index 96d256ba57a3..4564aa1c1278 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -714,16 +714,14 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly) { // Strip PHI nodes off of EH pads. SmallVector<PHINode *, 16> PHINodes; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - if (!BB->isEHPad()) + for (BasicBlock &BB : make_early_inc_range(F)) { + if (!BB.isEHPad()) continue; - if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB->getFirstNonPHI())) + if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB.getFirstNonPHI())) continue; - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = &*BI++; - auto *PN = dyn_cast<PHINode>(I); + for (Instruction &I : make_early_inc_range(BB)) { + auto *PN = dyn_cast<PHINode>(&I); // Stop at the first non-PHI. if (!PN) break; @@ -986,9 +984,9 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { BasicBlock::iterator CallI = std::prev(BB->getTerminator()->getIterator()); auto *CI = cast<CallInst>(&*CallI); - changeToUnreachable(CI, /*UseLLVMTrap=*/false); + changeToUnreachable(CI); } else { - changeToUnreachable(&I, /*UseLLVMTrap=*/false); + changeToUnreachable(&I); } // There are no more instructions in the block (except for unreachable), @@ -1009,7 +1007,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad; if (IsUnreachableRet || IsUnreachableCatchret || IsUnreachableCleanupret) { - changeToUnreachable(TI, /*UseLLVMTrap=*/false); + changeToUnreachable(TI); } else if (isa<InvokeInst>(TI)) { if (Personality == EHPersonality::MSVC_CXX && CleanupPad) { // Invokes within a cleanuppad for the MSVC++ personality never @@ -1025,11 +1023,10 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { void WinEHPrepare::cleanupPreparedFunclets(Function &F) { // Clean-up some of the mess we made by removing useles PHI nodes, trivial // branches, etc. - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - SimplifyInstructionsInBlock(BB); - ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true); - MergeBlockIntoPredecessor(BB); + for (BasicBlock &BB : llvm::make_early_inc_range(F)) { + SimplifyInstructionsInBlock(&BB); + ConstantFoldTerminator(&BB, /*DeleteDeadConditions=*/true); + MergeBlockIntoPredecessor(&BB); } // We might have some unreachable blocks after cleaning up some impossible @@ -1109,9 +1106,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert // loads of the slot before every use. DenseMap<BasicBlock *, Value *> Loads; - for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); - UI != UE;) { - Use &U = *UI++; + for (Use &U : llvm::make_early_inc_range(PN->uses())) { auto *UsingInst = cast<Instruction>(U.getUser()); if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) { // Use is on an EH pad phi. Leave it alone; we'll insert loads and |