diff options
Diffstat (limited to 'lib/CodeGen')
159 files changed, 15273 insertions, 17695 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 25842a7876a2..822a564441ac 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(State == NULL); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); @@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - for (const unsigned *Alias = TRI->getOverlaps(*I); + for (const uint16_t *Alias = TRI->getOverlaps(*I); unsigned Reg = *Alias; ++Alias) { State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - for (const unsigned *Alias = TRI->getOverlaps(*I); + for (const uint16_t *Alias = TRI->getOverlaps(*I); unsigned Reg = *Alias; ++Alias) { State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; - for (const unsigned *Alias = TRI->getOverlaps(Reg); + for (const uint16_t *Alias = TRI->getOverlaps(Reg); unsigned AliasReg = *Alias; ++Alias) { State->UnionGroups(AliasReg, 0); KillIndices[AliasReg] = BB->size(); @@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { PassthruRegs.insert(*Subreg); } @@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { unsigned SubregReg = *Subreg; if (!State->IsLive(SubregReg)) { @@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); @@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Any aliased that are live at this point are completely or // partially defined here, so group those aliases with Reg. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); @@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (const unsigned *Alias = TRI->getOverlaps(Reg); + for (const uint16_t *Alias = TRI->getOverlaps(Reg); unsigned AliasReg = *Alias; ++Alias) DefIndices[AliasReg] = Count; } @@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register uses for this instruction and update @@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } else { bool found = false; - for (const unsigned *Alias = TRI->getAliasSet(NewReg); + for (const uint16_t *Alias = TRI->getAliasSet(NewReg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (State->IsLive(AliasReg) || @@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + DEBUG(dbgs() << "Anti: "); DEBUG(MI->dump()); diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 1005f102bea6..87f64311a655 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, if (HintPair.first) { const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); // The remaining allocation order may depend on the hint. - ArrayRef<unsigned> Order = + ArrayRef<uint16_t> Order = TRI.getRawAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction()); if (Order.empty()) diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index d1e48a1f2e96..0ce7e0c3b5f6 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -34,8 +34,7 @@ public: /// AllocationOrder - Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. - /// @param ReservedRegs Set of reserved registers as returned by - /// TargetRegisterInfo::getReservedRegs(). + /// @param RegClassInfo Information about reserved and allocatable registers. AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index fafc01044d4f..00874d411378 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -1,4 +1,4 @@ -//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, /// consideration of global floating-point math flags. /// ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { - ISD::CondCode FPC, FOC; switch (Pred) { - case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; - case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; - case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; - case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; - case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; - case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; - case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; - case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; - case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; - case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; - case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; - case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; - case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; - case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; - case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; - case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; - default: - llvm_unreachable("Invalid FCmp predicate opcode!"); - FOC = FPC = ISD::SETFALSE; - break; + case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; + case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; + case FCmpInst::FCMP_OGT: return ISD::SETOGT; + case FCmpInst::FCMP_OGE: return ISD::SETOGE; + case FCmpInst::FCMP_OLT: return ISD::SETOLT; + case FCmpInst::FCMP_OLE: return ISD::SETOLE; + case FCmpInst::FCMP_ONE: return ISD::SETONE; + case FCmpInst::FCMP_ORD: return ISD::SETO; + case FCmpInst::FCMP_UNO: return ISD::SETUO; + case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; + case FCmpInst::FCMP_UGT: return ISD::SETUGT; + case FCmpInst::FCMP_UGE: return ISD::SETUGE; + case FCmpInst::FCMP_ULT: return ISD::SETULT; + case FCmpInst::FCMP_ULE: return ISD::SETULE; + case FCmpInst::FCMP_UNE: return ISD::SETUNE; + case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; + default: llvm_unreachable("Invalid FCmp predicate opcode!"); + } +} + +ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ; + case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE; + case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT; + case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE; + case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT; + case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE; + default: return CC; } - if (NoNaNsFPMath) - return FOC; - else - return FPC; } /// getICmpCondCode - Return the ISD condition code corresponding to @@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { case ICmpInst::ICMP_UGT: return ISD::SETUGT; default: llvm_unreachable("Invalid ICmp predicate opcode!"); - return ISD::SETNE; } } @@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; + (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !I->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(I)) for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; --BBI) { if (&*BBI == I) @@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, if (isa<DbgInfoIntrinsic>(BBI)) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !BBI->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(BBI)) return false; } @@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; @@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, } bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - const TargetLowering &TLI) { + SDValue &Chain, const TargetLowering &TLI) { const Function *F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if (CallerRetAttr & ~Attribute::NoAlias) return false; @@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return false; // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node); + return TLI.isUsedByReturnOnly(Node, Chain); } diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 3f2387325360..b60fda86a6ba 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -29,6 +29,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" @@ -36,6 +37,12 @@ #include "llvm/ADT/Twine.h" using namespace llvm; +cl::opt<bool> +EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, + cl::desc("Generate ARM EHABI tables with unwinding descriptors"), + cl::init(false)); + + ARMException::ARMException(AsmPrinter *A) : DwarfException(A), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) @@ -72,13 +79,15 @@ void ARMException::EndFunction() { Asm->OutStreamer.EmitPersonality(PerSym); } - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + if (EnableARMEHABIDescriptors) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); + // Emit actual exception table + EmitExceptionTable(); + } } Asm->OutStreamer.EmitFnEnd(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1999f3608788..b0b2ff4882af 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { DD = 0; DE = 0; MMI = 0; LI = 0; + CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); } @@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() { MF->getFunction()->needsUnwindTableEntry(); } +bool AsmPrinter::needsRelocationsForDwarfStringPool() const { + return MAI->doesDwarfUseRelocationsForStringPool(); +} + void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { MCSymbol *Label = MI.getOperand(0).getMCSymbol(); @@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitRawText(StringRef("\tnop\n")); } + const Function *F = MF->getFunction(); + for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { + const BasicBlock *BB = i; + if (!BB->hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(BB); + if (Sym->isDefined()) + continue; + OutStreamer.AddComment("Address of block that was removed by CodeGen"); + OutStreamer.EmitLabel(Sym); + } + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); @@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() { const MCExpr *SizeExp = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), - MCSymbolRefExpr::Create(CurrentFnSym, OutContext), + MCSymbolRefExpr::Create(CurrentFnSymForSize, + OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } @@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg()); + for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg()); *SR && Reg < 0; ++SR) { Reg = TRI->getDwarfRegNum(*SR, false); // FIXME: Get the bit range this register uses of the superregister @@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Emit module flags. + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + if (!ModuleFlags.empty()) + getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Finalize debug and EH information. if (DE) { { @@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) LI = &getAnalysis<MachineLoopInfo>(); @@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MCExpr *Value = 0; switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: - llvm_unreachable("Cannot emit EK_Inline jump table entry"); break; + llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, OutContext); @@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, return; } + case MachineJumpTableInfo::EK_GPRel64BlockAddress: { + // EK_GPRel64BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gpdword LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; + } + case MachineJumpTableInfo::EK_LabelDifference32: { // EK_LabelDifference32 - Each entry is the address of the block minus // the address of the jump table. This is used for PIC jump tables where @@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); - const TargetData *TD = TM.getTargetData(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } if (GV->getName() == "llvm.global_dtors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) { } } -typedef std::pair<int, Constant*> Structor; +typedef std::pair<unsigned, Constant*> Structor; static bool priority_order(const Structor& lhs, const Structor& rhs) { return lhs.first < rhs.first; @@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) { /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const Constant *List) { +void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority. if (!isa<ConstantArray>(List)) return; @@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) { CS->getOperand(1))); } - // Emit the function pointers in reverse priority order. - switch (MAI->getStructorOutputOrder()) { - case Structors::None: - break; - case Structors::PriorityOrder: - std::sort(Structors.begin(), Structors.end(), priority_order); - break; - case Structors::ReversePriorityOrder: - std::sort(Structors.rbegin(), Structors.rend(), priority_order); - break; + // Emit the function pointers in the target-specific order + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), priority_order); + for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + const MCSection *OutputSection = + (isCtor ? + getObjFileLowering().getStaticCtorSection(Structors[i].first) : + getObjFileLowering().getStaticDtorSection(Structors[i].first)); + OutStreamer.SwitchSection(OutputSection); + if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) + EmitAlignment(Align); + EmitXXStructor(Structors[i].second); } - for (unsigned i = 0, e = Structors.size(); i != e; ++i) - EmitGlobalConstant(Structors[i].second); } //===--------------------------------------------------------------------===// @@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (CE == 0) { llvm_unreachable("Unknown constant value to lower!"); - return MCConstantExpr::Create(0, Ctx); } switch (CE->getOpcode()) { @@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { !AP.MF ? 0 : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } - return MCConstantExpr::Create(0, Ctx); case Instruction::GetElementPtr: { const TargetData &TD = *AP.TM.getTargetData(); // Generate a symbolic expression for the byte address @@ -1543,6 +1570,19 @@ static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1. +} + + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) { // Make sure all array elements are sequences of the same repeated // byte. - if (CA->getNumOperands() == 0) return -1; - + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); int Byte = isRepeatedByteSequence(CA->getOperand(0), TM); if (Byte == -1) return -1; @@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } return Byte; } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) + return isRepeatedByteSequence(CDS); return -1; } -static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { - if (AddrSpace != 0 || !CA->isString()) { - // Not a string. Print the values in successive locations. - - // See if we can aggregate some values. Make sure it can be - // represented as a series of bytes of the constant value. - int Value = isRepeatedByteSequence(CA, AP.TM); - - if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); +static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, + unsigned AddrSpace,AsmPrinter &AP){ + + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, AP.TM); + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType()); + // Don't emit a 1-byte object as a .fill. + if (Bytes > 1) + return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa<IntegerType>(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize, AddrSpace); + } + } else if (ElementByteSize == 4) { + // FP Constants are printed as integer constants to avoid losing + // precision. + assert(CDS->getElementType()->isFloatTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + float F; + uint32_t I; + }; + + F = CDS->getElementAsFloat(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); } - else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } else { + assert(CDS->getElementType()->isDoubleTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + double F; + uint64_t I; + }; + + F = CDS->getElementAsDouble(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); } - return; } - // Otherwise, it can be emitted as .ascii. - SmallVector<char, 128> TmpVec; - TmpVec.reserve(CA->getNumOperands()); - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + const TargetData &TD = *AP.TM.getTargetData(); + unsigned Size = TD.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + CDS->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); - AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace); +} + +static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, + AsmPrinter &AP) { + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, AP.TM); + + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); + AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } } static void EmitGlobalConstantVector(const ConstantVector *CV, @@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { - // FP Constants are printed as integer constants to avoid losing - // precision. - if (CFP->getType()->isDoubleTy()) { + if (CFP->getType()->isHalfTy()) { if (AP.isVerbose()) { - double Val = CFP->getValueAPF().convertToDouble(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'; + SmallString<10> Str; + CFP->getValueAPF().toString(Str); + AP.OutStreamer.GetCommentOS() << "half " << Str << '\n'; } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace); return; } if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' + << " (" << format("0x%x", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); return; } + // FP Constants are printed as integer constants to avoid losing + // precision. + if (CFP->getType()->isDoubleTy()) { + if (AP.isVerbose()) { + double Val = CFP->getValueAPF().convertToDouble(); + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' + << " (" << format("0x%lx", IntVal) << ")\n"; + } + + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + return; + } + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // API needed to prevent premature destruction @@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { - uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + const TargetData *TD = AP.TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(CV->getType()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); - } if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: case 4: case 8: if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: @@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, } } - if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, AP); - - if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, AP); - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) return EmitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) + return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) + return EmitGlobalConstantArray(CVA, AddrSpace, AP); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (CE->getOpcode() == Instruction::BitCast) + return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + return EmitGlobalConstantImpl(New, AddrSpace, AP); + } + } + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), - AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. @@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB->getAlignment()) - EmitAlignment(Log2_32(Align)); + EmitAlignment(Align); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { OutStreamer.EmitLabel(Syms[i]); } + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + EmitBasicBlockLoopComments(*MBB, LI, *this); + } + // Print the main label for the block. if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { if (isVerbose() && OutStreamer.hasRawTextSupport()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - - EmitBasicBlockLoopComments(*MBB, LI, *this); - // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); } } else { - if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); - } - OutStreamer.EmitLabel(MBB->getSymbol()); } } @@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { MachineInstr &MI = *II; // If it is not a simple branch, we are in a table somewhere. - if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + if (!MI.isBranch() || MI.isIndirectBranch()) return false; // If we are the operands of one of the branches, this is not @@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { } report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); - return 0; } - diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 4d6c28118427..90d511cbab0a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -25,6 +25,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -35,23 +36,8 @@ using namespace llvm; void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - - if (MAI->hasLEB128()) { - OutStreamer.EmitSLEB128IntValue(Value); - return; - } - // If we don't have .sleb128, emit as .bytes. - int Sign = Value >> (8 * sizeof(Value) - 1); - bool IsMore; - - do { - unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); - Value >>= 7; - IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; - if (IsMore) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (IsMore); + OutStreamer.EmitSLEB128IntValue(Value); } /// EmitULEB128 - emit the specified signed leb128 value. @@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - // FIXME: Should we add a PadTo option to the streamer? - if (MAI->hasLEB128() && PadTo == 0) { - OutStreamer.EmitULEB128IntValue(Value); - return; - } - - // If we don't have .uleb128 or we want to emit padding, emit as .bytes. - do { - unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); - Value >>= 7; - if (Value || PadTo != 0) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (Value); - - if (PadTo) { - if (PadTo > 1) - OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/); - OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/); - } + OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. @@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { return 0; switch (Encoding & 0x07) { - default: assert(0 && "Invalid encoded value."); + default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; @@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, const MCSymbol *SectionLabel) const { // On COFF targets, we have to emit the special .secrel32 directive. - if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) { - // FIXME: MCize. - OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName())); + if (MAI->getDwarfSectionOffsetDirective()) { + OutStreamer.EmitCOFFSecRel32(Label); return; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 8eda889155a2..d60585465be0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - if (OpNo >= MI->getNumOperands()) { + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 67d927348b54..58fe2ed9d357 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp + DwarfAccelTable.cpp DwarfCFIException.cpp DwarfCompileUnit.cpp DwarfDebug.cpp @@ -11,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter OcamlGCPrinter.cpp Win64Exception.cpp ) - -add_llvm_library_dependencies(LLVMAsmPrinter - LLVMAnalysis - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMCParser - LLVMSupport - LLVMTarget - ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 9c1ce761b0c5..3776848e3f47 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,15 +112,6 @@ DIE::~DIE() { delete Children[i]; } -/// addSiblingOffset - Add a sibling offset field to the front of the DIE. -/// -DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) { - DIEInteger *DI = new (A) DIEInteger(0); - Values.insert(Values.begin(), DI); - Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); - return DI; -} - #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; @@ -174,6 +165,7 @@ void DIE::dump() { } #endif +void DIEValue::anchor() { } #ifndef NDEBUG void DIEValue::dump() { @@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); - default: llvm_unreachable("DIE Value form not supported yet"); break; + default: llvm_unreachable("DIE Value form not supported yet"); } - return 0; } #ifndef NDEBUG void DIEInteger::print(raw_ostream &O) { - O << "Int: " << (int64_t)Integer - << format(" 0x%llx", (unsigned long long)Integer); -} -#endif - -//===----------------------------------------------------------------------===// -// DIEString Implementation -//===----------------------------------------------------------------------===// - -/// EmitValue - Emit string value. -/// -void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitBytes(Str, /*addrspace*/0); - // Emit nul terminator. - AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); -} - -#ifndef NDEBUG -void DIEString::print(raw_ostream &O) { - O << "Str: \"" << Str << "\""; + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); } #endif @@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } @@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } @@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { switch (Form) { - default: assert(0 && "Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; @@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); - default: llvm_unreachable("Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); } - return 0; } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 7d61f1edff4a..f93ea1b045b2 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -31,17 +31,17 @@ namespace llvm { class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - unsigned Attribute; + uint16_t Attribute; /// Form - Dwarf form code. /// - unsigned Form; + uint16_t Form; public: - DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} // Accessors. - unsigned getAttribute() const { return Attribute; } - unsigned getForm() const { return Form; } + uint16_t getAttribute() const { return Attribute; } + uint16_t getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,41 +54,41 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - unsigned Tag; + uint16_t Tag; - /// Unique number for node. + /// ChildrenFlag - Dwarf children flag. /// - unsigned Number; + uint16_t ChildrenFlag; - /// ChildrenFlag - Dwarf children flag. + /// Unique number for node. /// - unsigned ChildrenFlag; + unsigned Number; /// Data - Raw data bytes for abbreviation. /// SmallVector<DIEAbbrevData, 8> Data; public: - DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - unsigned getTag() const { return Tag; } + uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } - unsigned getChildrenFlag() const { return ChildrenFlag; } + uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; } - void setTag(unsigned T) { Tag = T; } - void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setTag(uint16_t T) { Tag = T; } + void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(unsigned Attribute, unsigned Form) { + void AddAttribute(uint16_t Attribute, uint16_t Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } /// AddFirstAttribute - Adds a set of attribute information to the front /// of the abbreviation. - void AddFirstAttribute(unsigned Attribute, unsigned Form) { + void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); } @@ -113,10 +113,6 @@ namespace llvm { class DIE { protected: - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - /// Offset - Offset in debug info section. /// unsigned Offset; @@ -125,6 +121,10 @@ namespace llvm { /// unsigned Size; + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + /// Children DIEs. /// std::vector<DIE *> Children; @@ -139,8 +139,8 @@ namespace llvm { mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), - Size(0), Parent (0), IndentCount(0) {} + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), + IndentCount(0) {} virtual ~DIE(); // Accessors. @@ -163,16 +163,6 @@ namespace llvm { Values.push_back(Value); } - /// SiblingOffset - Return the offset of the debug information entry's - /// sibling. - unsigned getSiblingOffset() const { return Offset + Size; } - - /// addSiblingOffset - Add a sibling offset field to the front of the DIE. - /// The caller is responsible for deleting the return value at or after the - /// same time it destroys this DIE. - /// - DIEValue *addSiblingOffset(BumpPtrAllocator &A); - /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { @@ -195,12 +185,12 @@ namespace llvm { /// DIEValue - A debug information entry value. /// class DIEValue { + virtual void anchor(); public: enum { isInteger, isString, isLabel, - isSectionOffset, isDelta, isEntry, isBlock @@ -276,33 +266,6 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEString - A string value DIE. This DIE keeps string reference only. - /// - class DIEString : public DIEValue { - const StringRef Str; - public: - explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {} - - /// EmitValue - Emit string value. - /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; - - /// SizeOf - Determine size of string value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const { - return Str.size() + sizeof(char); // sizeof('\0'); - } - - // Implement isa/cast/dyncast. - static bool classof(const DIEString *) { return true; } - static bool classof(const DIEValue *S) { return S->getType() == isString; } - -#ifndef NDEBUG - virtual void print(raw_ostream &O); -#endif - }; - - //===--------------------------------------------------------------------===// /// DIELabel - A label expression DIE. // class DIELabel : public DIEValue { @@ -359,7 +322,7 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEntry - A pointer to another debug information entry. An instance of + /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 000000000000..660684d1bea5 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,287 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { + switch (AT) { + case eAtomTypeNULL: return "eAtomTypeNULL"; + case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; + case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; + case eAtomTypeTag: return "eAtomTypeTag"; + case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; + case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; + } + llvm_unreachable("invalid AtomType!"); +} + +// The general case would need to have a less hard coded size for the +// length of the HeaderData, however, if we're constructing based on a +// single Atom then we know it will always be: 4 + 4 + 2 + 2. +DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : + Header(12), + HeaderData(atom) { +} + +// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. +DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) : + Header(8 + (atomList.size() * 4)), + HeaderData(atomList) { +} + +DwarfAccelTable::~DwarfAccelTable() { + for (size_t i = 0, e = Data.size(); i < e; ++i) + delete Data[i]; + for (StringMap<DataArray>::iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) + for (DataArray::iterator DI = EI->second.begin(), + DE = EI->second.end(); DI != DE; ++DI) + delete (*DI); +} + +void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DataArray &DIEs = Entries[Name]; + DIEs.push_back(new HashDataContents(die, Flags)); +} + +void DwarfAccelTable::ComputeBucketCount(void) { + // First get the number of unique hashes. + std::vector<uint32_t> uniques(Data.size()); + for (size_t i = 0, e = Data.size(); i < e; ++i) + uniques[i] = Data[i]->HashValue; + array_pod_sort(uniques.begin(), uniques.end()); + std::vector<uint32_t>::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) Header.bucket_count = num/4; + if (num > 16) Header.bucket_count = num/2; + else Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +namespace { + // DIESorter - comparison predicate that sorts DIEs by their offset. + struct DIESorter { + bool operator()(const struct DwarfAccelTable::HashDataContents *A, + const struct DwarfAccelTable::HashDataContents *B) const { + return A->Die->getOffset() < B->Die->getOffset(); + } + }; +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { + // Create the individual hash data outputs. + for (StringMap<DataArray>::iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + struct HashData *Entry = new HashData((*EI).getKeyData()); + + // Unique the entries. + std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter()); + EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), + EI->second.end()); + + for (DataArray::const_iterator DI = EI->second.begin(), + DE = EI->second.end(); + DI != DE; ++DI) + Entry->addData((*DI)); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0, e = Data.size(); i < e; ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + } +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer.AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer.AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer.AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. This will look +// like a list of numbers of how many elements are in each bucket. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer.AddComment("Bucket " + Twine(i)); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + index += Buckets[i].size(); + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); + Asm->EmitInt32((*HI)->HashValue); + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); + MCContext &Context = Asm->OutStreamer.getContext(); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), + Context); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + // Remember to emit the label for our offset. + Asm->OutStreamer.EmitLabel((*HI)->Sym); + Asm->OutStreamer.AddComment((*HI)->Str); + Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), + D->getStringPool()); + Asm->OutStreamer.AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->Data.size()); + for (std::vector<struct HashDataContents*>::const_iterator + DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + DI != DE; ++DI) { + // Emit the DIE offset + Asm->EmitInt32((*DI)->Die->getOffset()); + // If we have multiple Atoms emit that info too. + // FIXME: A bit of a hack, we either emit only one atom or all info. + if (HeaderData.Atoms.size() > 1) { + Asm->EmitInt16((*DI)->Die->getTag()); + Asm->EmitInt8((*DI)->Flags); + } + } + // Emit a 0 to terminate the data unless we have a hash collision. + if (PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + PrevHash = (*HI)->HashValue; + } + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, + DwarfDebug *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + EmitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap<DataArray>::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + O << "Name: " << EI->getKeyData() << "\n"; + for (DataArray::const_iterator DI = EI->second.begin(), + DE = EI->second.end(); + DI != DE; ++DI) + (*DI)->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector<HashData*>::const_iterator + DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) + (*DI)->print(O); + + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 000000000000..2278d4c784f4 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,290 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "DIE.h" +#include <vector> +#include <map> + +// The dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; + +class DwarfAccelTable { + + enum HashFunctionType { + eHashFunctionDJB = 0u + }; + + static uint32_t HashDJB (StringRef Str) { + uint32_t h = 5381; + for (unsigned i = 0, e = Str.size(); i != e; ++i) + h = ((h << 5) + h) + Str[i]; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount (void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader (uint32_t data_len) : + magic (MagicHash), version (1), hash_function (eHashFunctionDJB), + bucket_count (0), hashes_count (0), header_data_len (data_len) + {} + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + enum AtomType { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that + // contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as + // DW_FORM_data1 (if no tags exceed 255) or + // DW_FORM_data2. + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags + }; + + enum TypeFlags { + eTypeFlagClassMask = 0x0000000fu, + + // Always set for C++, only set for ObjC if this is the + // @implementation for a class. + eTypeFlagClassIsImplementation = ( 1u << 1 ) + }; + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + AtomType type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + Atom(AtomType type, uint16_t form) : type(type), form(form) {} + static const char * AtomTypeString(enum AtomType); +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << AtomTypeString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { + print(dbgs()); + } +#endif + }; + + private: + struct TableHeaderData { + + uint32_t die_offset_base; + std::vector<Atom> Atoms; + + TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList, + uint32_t offset = 0) : + die_offset_base(offset) { + for (size_t i = 0, e = AtomList.size(); i != e; ++i) + Atoms.push_back(AtomList[i]); + } + + TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) + : die_offset_base(offset) { + Atoms.push_back(Atom); + } + +#ifndef NDEBUG + void print (raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { + print(dbgs()); + } +#endif + }; + + // The data itself consists of a str_offset, a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] +public: + struct HashDataContents { + DIE *Die; // Offsets + char Flags; // Specific flags to output + + HashDataContents(DIE *D, char Flags) : + Die(D), + Flags(Flags) { } + #ifndef NDEBUG + void print(raw_ostream &O) const { + O << " Offset: " << Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; + O << " Flags: " << Flags << "\n"; + } + #endif + }; +private: + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + std::vector<struct HashDataContents*> Data; // offsets + HashData(StringRef S) : Str(S) { + HashValue = DwarfAccelTable::HashDJB(S); + } + void addData(struct HashDataContents *Datum) { Data.push_back(Datum); } + #ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: " ; + if (Sym) Sym->print(O); + else O << "<none>"; + O << "\n"; + for (size_t i = 0; i < Data.size(); i++) { + O << " Offset: " << Data[i]->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n"; + O << " Flags: " << Data[i]->Flags << "\n"; + } + } + void dump() { + print(dbgs()); + } + #endif + }; + + DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT + void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void EmitOffsets(AsmPrinter *, MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector<HashData*> Data; + + // String Data + typedef std::vector<struct HashDataContents*> DataArray; + typedef StringMap<DataArray> StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector<HashData*> HashList; + typedef std::vector<HashList> BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation + public: + DwarfAccelTable(DwarfAccelTable::Atom); + DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &); + ~DwarfAccelTable(); + void AddName(StringRef, DIE*, char = 0); + void FinalizeTable(AsmPrinter *, const char *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 8ed4f4c43a7c..d975f1f97bea 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() { Asm->OutStreamer.EmitCFIEndProc(); + if (!shouldEmitPersonality) + return; + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitPersonality) - EmitExceptionTable(); + EmitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 6fe476d02ef7..69dc454ae1d7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -13,12 +13,14 @@ #define DEBUG_TYPE "dwarfdebug" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Analysis/DIBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -30,8 +32,9 @@ using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) - : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { +CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, + DwarfDebug *DW) + : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, Die->addValue(Attribute, Form, Value); } -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); +/// addString - Add a string attribute data and value. We always emit a +/// reference to the string pool instead of immediate strings so that DIEs have +/// more predictable sizes. +void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { + MCSymbol *Symb = DD->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DD->getStringPool(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } /// addLabel - Add a Dwarf label attribute data and value. @@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, createDIEEntry(Entry)); } - /// addBlock - Add block data. /// void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, @@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), - G.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. if (!SP.Verify()) return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; + // If the line number is 0, don't add it. unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) + if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), + SP.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { return; unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) + if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), + Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), + File.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) { /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { - assert (MO.isImm() && "Invalid machine operand!"); + assert(MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; bool SignedConstant = isTypeSigned(Ty, &SizeInBits); @@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { Buffer.addChild(getOrCreateTemplateValueParameterDIE( DITemplateValueParameter(Element))); } - } + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { @@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { assert(Ty.isDerivedType() && "Unknown kind of DIType"); constructTypeDIE(*TyDIE, DIDerivedType(Ty)); } - + // If this is a named finished type then include it in the list of types + // for the accelerator tables. + if (!Ty.getName().empty() && !Ty.isForwardDecl()) { + bool IsImplementation = 0; + if (Ty.isCompositeType()) { + DICompositeType CT(Ty); + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = (CT.getRunTimeLang() == 0) || + CT.isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? + DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); + } + addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty) { +void CompileUnit::addType(DIE *Entity, DIType Ty, + unsigned Attribute) { if (!Ty.Verify()) return; @@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); return; } @@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); // If this is a complete composite type then include it in the // list of global types. @@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { Buffer.setTag(dwarf::DW_TAG_unspecified_type); @@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { } Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) - if (Size) + if (Size && Tag != dwarf::DW_TAG_pointer_type) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); // Add source line info if available and TyDesc is not a forward declaration. @@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Buffer.addChild(Arg); } } - // Add prototype flag. - if (isPrototyped) + // Add prototype flag if we're dealing with a C language and the + // function has been prototyped. + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); } break; @@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DISubprogram SP(Element); ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); @@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); + addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (Element.isObjCProperty()) { + DIObjCProperty Property(Element); + ElemDie = new DIE(Property.getTag()); + StringRef PropertyName = Property.getObjCPropertyName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + unsigned PropertyAttributes = 0; + if (Property.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (Property.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (Property.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (Property.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (Property.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (Property.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + + DIEEntry *Entry = getDIEEntry(Element); + if (!Entry) { + Entry = createDIEEntry(ElemDie); + insertDIEEntry(Element, Entry); + } + } else continue; Buffer.addChild(ElemDie); } @@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, @@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, dwarf::DW_FORM_flag, 1); - if (Tag == dwarf::DW_TAG_class_type) + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); } } @@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); return ParamDIE; } @@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); addType(ParamDIE, TPV.getType()); if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, TPV.getValue()); return ParamDIE; @@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); insertDIE(NS, NDie); - if (!NS.getName().empty()) - addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + if (!NS.getName().empty()) { + addString(NDie, dwarf::DW_AT_name, NS.getName()); + addAccelNamespace(NS.getName(), NDie); + } else + addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; @@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPDie) return SPDie; + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) { + DeclDie = getOrCreateSubprogramDIE(SPDecl); + } + SPDie = new DIE(dwarf::DW_TAG_subprogram); // DW_TAG_inlined_subroutine may refer to this DIE. @@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); + // Unfortunately this code needs to stay here to work around + // a bug in older gdbs that requires the linkage name to resolve + // multiple template functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. - if (SP.getFunctionDeclaration().isSubprogram()) + if (DeclDie) { + // Refer function declaration directly. + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + DeclDie); + return SPDie; + } // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - SP.getName()); + addString(SPDie, dwarf::DW_AT_name, SP.getName()); addSourceLine(SPDie, SP); - if (SP.isPrototyped()) + // Add the prototype if we have a prototype and we have a C like + // language. + if (SP.isPrototyped() && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. @@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); @@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { insertDIE(N, VariableDIE); // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); bool isGlobalVariable = GV.getGlobal() != NULL; if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); // Add type. DIType GTy = GV.getType(); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // Expose as global. - addGlobal(GV.getName(), VariableDIE); - } + // Add line number info. addSourceLine(VariableDIE, GV); // Add to context owner. DIDescriptor GVContext = GV.getContext(); addToContextOwner(VariableDIE, GVContext); // Add location. + bool addToAccelTable = false; + DIE *VariableSpecDIE = NULL; if (isGlobalVariable) { + addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Block, 0, dwarf::DW_FORM_udata, @@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); @@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); - } + } } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); @@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + if (addToAccelTable) { + DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; + addAccelName(GV.getName(), AddrDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), AddrDIE); + } + return; } @@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); + uint64_t L = SR.getLo(); + uint64_t H = SR.getHi(); // The L value defines the lower bounds which is typically zero for C/C++. The // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size @@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) return; } if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } @@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = ETy.getEnumValue(); addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); return Enumerator; @@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, - dwarf::DW_FORM_string, Name); + addString(VariableDie, dwarf::DW_AT_name, Name); addSourceLine(VariableDie, DV->getVariable()); addType(VariableDie, DV->getType()); } @@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(MemberDie, dwarf::DW_AT_name, Name); addType(MemberDie, DT.getTypeDerivedFrom()); @@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. else - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. + if (MDNode *PNode = DT.getObjCProperty()) + if (DIEEntry *PropertyDie = getDIEEntry(PNode)) + MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); + + // This is only for backward compatibility. StringRef PropertyName = DT.getObjCPropertyName(); if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, - PropertyName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); StringRef GetterName = DT.getObjCPropertyGetterName(); if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, - dwarf::DW_FORM_string, GetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); StringRef SetterName = DT.getObjCPropertySetterName(); if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, - dwarf::DW_FORM_string, SetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); unsigned PropertyAttributes = 0; if (DT.isReadOnlyObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 785926579fa4..45e407e27ffa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -29,13 +29,17 @@ class ConstantInt; class DbgVariable; //===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate +/// CompileUnit - This dwarf writer support class manages information associated /// with a source file. class CompileUnit { /// ID - File identifier for source. /// unsigned ID; + /// Language - The DW_AT_language of the compile unit + /// + unsigned Language; + /// Die - Compile unit debug information entry. /// const OwningPtr<DIE> CUDie; @@ -56,14 +60,17 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap<DIE*> Globals; - /// GlobalTypes - A map of globally visible types for this unit. /// StringMap<DIE*> GlobalTypes; + /// AccelNames - A map of names for the name accelerator table. + /// + StringMap<std::vector<DIE*> > AccelNames; + StringMap<std::vector<DIE*> > AccelObjC; + StringMap<std::vector<DIE*> > AccelNamespace; + StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes; + /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -73,27 +80,56 @@ class CompileUnit { DenseMap<DIE *, const MDNode *> ContainingTypeMap; public: - CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW); ~CompileUnit(); // Accessors. unsigned getID() const { return ID; } + unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } - const StringMap<DIE*> &getGlobals() const { return Globals; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + const StringMap<std::vector<DIE*> > &getAccelNames() const { + return AccelNames; + } + const StringMap<std::vector<DIE*> > &getAccelObjC() const { + return AccelObjC; + } + const StringMap<std::vector<DIE*> > &getAccelNamespace() const { + return AccelNamespace; + } + const StringMap<std::vector<std::pair<DIE*, unsigned > > > + &getAccelTypes() const { + return AccelTypes; + } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + + /// addAccelName - Add a new name to the name accelerator table. + void addAccelName(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelNames[Name]; + DIEs.push_back(Die); + } + void addAccelObjC(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); + } + void addAccelNamespace(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); + } + void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { + std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); + } + /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } @@ -150,8 +186,7 @@ public: /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); + void addString(DIE *Die, unsigned Attribute, const StringRef Str); /// addLabel - Add a Dwarf label attribute data and value. /// @@ -178,6 +213,7 @@ public: void addSourceLine(DIE *Die, DISubprogram SP); void addSourceLine(DIE *Die, DIType Ty); void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -225,8 +261,10 @@ public: /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1b7e370fca09..cb7887890cda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::init(false)); + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + + // Turn on accelerator tables for Darwin. + if (Triple(M->getTargetTriple()).isOSDarwin()) + DwarfAccelTables = true; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebug::~DwarfDebug() { } +/// EmitSectionSym - Switch to the specified MCSection and emit an assembler +/// temporary label to it if SymbolStem is specified. +static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, + const char *SymbolStem = 0) { + Asm->OutStreamer.SwitchSection(Section); + if (!SymbolStem) return 0; + + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); + Asm->OutStreamer.EmitLabel(TmpSym); + return TmpSym; +} + +MCSymbol *DwarfDebug::getStringPool() { + return Asm->GetTempSymbol("section_str"); +} + MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str]; if (Entry.first) return Entry.first; @@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { return Entry.first = Asm->GetTempSymbol("string", Entry.second); } - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { @@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) return false; + + size_t pos = Name.find(')'); + if (pos != std::string::npos) { + if (Name[pos+1] != ' ') return false; + return true; + } + return false; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, + DIE* Die) { + if (!SP.isDefinition()) return; + + TheCU->addAccelName(SP.getName(), Die); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP.getName())) { + StringRef Class, Category; + getObjCClassCategory(SP.getName(), Class, Category); + TheCU->addAccelObjC(Class, Die); + if (Category != "") + TheCU->addAccelObjC(Category, Die); + // Also add the base method name to the name table. + TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + } +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP(SPNode); DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) - // Refer function declaration directly. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPCU->getOrCreateSubprogramDIE(SPDecl)); - else { + if (!SPDecl.isSubprogram()) { // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another // function then gdb prefers the definition at top level and but does not @@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { - SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + addSubprogramNames(SPCU, SP, SPDie); + return SPDie; } @@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; @@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, /// of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); - assert (Ranges.empty() == false - && "LexicalScope does not have instruction markers!"); + assert(Ranges.empty() == false && + "LexicalScope does not have instruction markers!"); if (!Scope->getScopeNode()) return NULL; @@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - assert (0 && "Unexpected Start and End labels for a inlined scope!"); - return 0; + llvm_unreachable("Unexpected Start and End labels for a inlined scope!"); } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, I = InlineInfo.find(InlinedSP); if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, - ScopeDIE)); + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); InlinedSPNodes.push_back(InlinedSP); } else I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + GetOrCreateSourceID(DL.getFilename(), DL.getDirectory())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + return ScopeDIE; } @@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; - SmallVector <DIE *, 8> Children; + SmallVector<DIE *, 8> Children; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) @@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); + TheCU->addPubTypes(DISubprogram(DS)); - return ScopeDIE; + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. - unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. if (FileName.empty()) return GetOrCreateSourceID("<stdin>", StringRef()); - // MCStream expects full path name as filename. - if (!DirName.empty() && !sys::path::is_absolute(FileName)) { - SmallString<128> FullPathName = DirName; - sys::path::append(FullPathName, FileName); - // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. - return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); - } + // TODO: this might not belong here. See if we can factor this better. + if (DirName == CompilationDir) + DirName = ""; - StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); - if (Entry.getValue()) - return Entry.getValue(); + unsigned SrcId = SourceIdMap.size()+1; - unsigned SrcId = SourceIdMap.size(); - Entry.setValue(SrcId); + // We look up the file/dir pair by concatenating them with a zero byte. + SmallString<128> NamePair; + NamePair += DirName; + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += FileName; + + StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); + if (Ent.getValue() != SrcId) + return Ent.getValue(); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); return SrcId; } @@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); - StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN, Dir); + CompilationDir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); - NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); + CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); - NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This - // simplifies debug range entries. - NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addString(Die, dwarf::DW_AT_name, FN); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, Asm->GetTempSymbol("section_line")); else NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - if (!Dir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, - Flags); + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - unsigned RVer = DIUnit.getRunTimeVersion(); - if (RVer) + if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); @@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + CompileUnit *&CURef = SPMap[N]; + if (CURef) + return; + CURef = TheCU; + DISubprogram SP(N); if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing @@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - // Expose as global. - TheCU->addGlobal(SP.getName(), SubprogramDie); - - SPMap[N] = TheCU; return; } @@ -676,7 +758,7 @@ void DwarfDebug::endModule() { // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); - assert (SPCU && "Unable to find Compile Unit!"); + assert(SPCU && "Unable to find Compile Unit!"); constructSubprogramDIE(SPCU, SP); DIE *ScopeDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { @@ -697,6 +779,13 @@ void DwarfDebug::endModule() { DIE *ISP = *AI; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. @@ -727,9 +816,14 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a debug pubnames section. - emitDebugPubNames(); - + // Emit info into a dwarf accelerator table sections. + if (DwarfAccelTables) { + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + } + // Emit info into a debug pubtypes section. emitDebugPubTypes(); @@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, /// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; @@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, if (MI->getOperand(0).isCImm()) return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); - assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); - return DotDebugLocEntry(); + llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } /// collectVariableInfo - Find variables for each lexical scope. @@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin)); + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, + Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = DWARF2_FLAG_IS_STMT; + unsigned Flags = 0; PrevInstLoc = DL; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); } + if (PrologEndLoc.isUnknown()) + Flags |= DWARF2_FLAG_IS_STMT; + if (!DL.isUnknown()) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); @@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { } /// getFnDebugLoc - Walk up the scope chain of given debug loc and find -/// line number info for the function. +/// line number info for the function. static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) - return DebugLoc::get(SP.getLineNumber(), 0, SP); + if (SP.Verify()) { + // Check for number of operands since the compatibility is + // cheap here. + if (SP->getNumOperands() > 19) + return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + else + return DebugLoc::get(SP.getLineNumber(), 0, SP); + } + return DebugLoc(); } @@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; if (MI->isDebugValue()) { - assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); + assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); // Keep track of user variables. const MDNode *Var = @@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (const unsigned *AI = TRI->getOverlaps(MOI->getReg()); + for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg()); unsigned Reg = *AI; ++AI) { const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_FLAG_IS_STMT); + 0); } } @@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert (TheCU && "Unable to find compile unit!"); + assert(TheCU && "Unable to find compile unit!"); // Construct abstract scopes. ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList(); @@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - if (!DisableFramePointerElim(*MF)) + if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1); @@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, Fn = DB.getFilename(); Dir = DB.getDirectory(); } else - assert(0 && "Unexpected scope info"); + llvm_unreachable("Unexpected scope info"); Src = GetOrCreateSourceID(Fn, Dir); } @@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { // Get the children. const std::vector<DIE *> &Children = Die->getChildren(); - // If not last sibling and has children then add sibling offset attribute. - if (!Last && !Children.empty()) - Die->addSiblingOffset(DIEValueAllocator); - // Record the abbreviation. assignAbbrevNumber(Die->getAbbrev()); @@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() { } } -/// EmitSectionSym - Switch to the specified MCSection and emit an assembler -/// temporary label to it if SymbolStem is specified. -static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = 0) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - /// EmitSectionLabels - Emit initial Dwarf sections with a label at /// the start of each one. void DwarfDebug::EmitSectionLabels() { @@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); - EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); @@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) { Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_sibling: - Asm->EmitInt32(Die->getSiblingOffset()); - break; case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); @@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast<DIEInteger>(Values[i]); - if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) { + if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) { Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, V->getValue(), 4); @@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames() { +/// emitAccelNames - Emit visible names into a hashed accelerator table +/// section. +void DwarfDebug::emitAccelNames() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubnames_end", TheCU->getID()), - Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); + AT.FinalizeTable(Asm, "Names"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", - TheCU->getID())); + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); +/// emitAccelObjC - Emit objective C classes and categories into a hashed +/// accelerator table section. +void DwarfDebug::emitAccelObjC() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), - DwarfInfoSectionSym); + AT.FinalizeTable(Asm, "ObjC"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelObjCSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), - Asm->GetTempSymbol("info_begin", TheCU->getID()), - 4); + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} - const StringMap<DIE*> &Globals = TheCU->getGlobals(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { +/// emitAccelNamespace - Emit namespace dies into a hashed accelerator +/// table. +void DwarfDebug::emitAccelNamespaces() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + AT.FinalizeTable(Asm, "namespac"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelNamespaceSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); - } + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - TheCU->getID())); +/// emitAccelTypes() - Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + std::vector<DwarfAccelTable::Atom> Atoms; + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + dwarf::DW_FORM_data2)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + dwarf::DW_FORM_data1)); + DwarfAccelTable AT(Atoms); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names + = TheCU->getAccelTypes(); + for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; + for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI + = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) + AT.AddName(Name, (*DI).first, (*DI).second); + } } + + AT.FinalizeTable(Asm, "types"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelTypesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); } void DwarfDebug::emitDebugPubTypes() { for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. + // Start the dwarf pubtypes section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubTypesSection()); Asm->OutStreamer.AddComment("Length of Public Types Info"); @@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() { Asm->EmitInt32(Entity->getOffset()); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } @@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() { // Emit a label for reference from debug information entries. Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - // Emit the string itself. - Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); + // Emit the string itself with a terminating null byte. + Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), + Entries[i].second->getKeyLength()+1), + 0/*addrspace*/); } } @@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() { /// __debug_info section, and the low_pc is the starting address for the /// inlining instance. void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUsesInlineInfoSection()) + if (!Asm->MAI->doesDwarfUseInlineInfoSection()) return; if (!FirstCU) @@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() { StringRef Name = SP.getName(); Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) { - Asm->OutStreamer.EmitBytes(Name, 0); - Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator. - } else + if (LName.empty()) + Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym); + else Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), DwarfStrSectionSym); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 35653be5c897..83f30f5b446f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -30,7 +30,8 @@ namespace llvm { class CompileUnit; -class DbgConcreteScope; +class ConstantInt; +class ConstantFP; class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; @@ -207,8 +208,8 @@ class DwarfDebug { /// std::vector<DIEAbbrev *> Abbreviations; - /// SourceIdMap - Source id map, i.e. pair of directory id and source file - /// id mapped to a unique id. + /// SourceIdMap - Source id map, i.e. pair of source filename and directory, + /// separated by a zero byte, mapped to a unique id. StringMap<unsigned> SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect @@ -216,8 +217,6 @@ class DwarfDebug { StringMap<std::pair<MCSymbol*, unsigned> > StringPool; unsigned NextStringPoolNumber; - MCSymbol *getStringPoolEntry(StringRef Str); - /// SectionMap - Provides a unique id per text section. /// UniqueVector<const MCSection*> SectionMap; @@ -239,12 +238,12 @@ class DwarfDebug { /// DotDebugLocEntries - Collection of DotDebugLocEntry. SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; - /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked + /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; /// InlineInfo - Keep track of inlined functions and their location. This - /// information is used to populate debug_inlined section. + /// information is used to populate the debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; SmallVector<const MDNode *, 4> InlinedSPNodes; @@ -304,6 +303,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + // As an optimization, there is no need to emit an entry in the directory + // table for the same directory as DW_at_comp_dir. + StringRef CompilationDir; + private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -340,7 +343,7 @@ private: /// the start of each one. void EmitSectionLabels(); - /// emitDIE - Recusively Emits a debug information entry. + /// emitDIE - Recursively Emits a debug information entry. /// void emitDIE(DIE *Die); @@ -365,10 +368,22 @@ private: /// void emitEndOfLineMatrix(unsigned SectionEnd); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. - /// - void emitDebugPubNames(); + /// emitAccelNames - Emit visible names into a hashed accelerator table + /// section. + void emitAccelNames(); + + /// emitAccelObjC - Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + + /// emitAccelNamespace - Emit namespace dies into a hashed accelerator + /// table. + void emitAccelNamespaces(); + /// emitAccelTypes() - Emit type dies into a hashed accelerator table. + /// + void emitAccelTypes(); + /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. /// void emitDebugPubTypes(); @@ -407,10 +422,10 @@ private: /// 3. an unsigned LEB128 number indicating the number of distinct inlining /// instances for the function. /// - /// The rest of the entry consists of a {die_offset, low_pc} pair for each + /// The rest of the entry consists of a {die_offset, low_pc} pair for each /// inlined instance; the die_offset points to the inlined_subroutine die in - /// the __debug_info section, and the low_pc is the starting address for the - /// inlining instance. + /// the __debug_info section, and the low_pc is the starting address for the + /// inlining instance. void emitDebugInlineInfo(); /// constructCompileUnit - Create new CompileUnit for the given @@ -426,8 +441,8 @@ private: void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - /// identifyScopeMarkers() - Indentify instructions that are marking - /// beginning of or end of a scope. + /// identifyScopeMarkers() - Indentify instructions that are marking the + /// beginning of or ending of a scope. void identifyScopeMarkers(); /// addCurrentFnArgument - If Var is an current function argument that add @@ -472,7 +487,7 @@ public: void collectInfoFromNamedMDNodes(Module *M); /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. - /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder. + /// FIXME - Remove this when DragonEgg switches to DIBuilder. bool collectLegacyDebugInfo(Module *M); /// beginModule - Emit all Dwarf sections that should come prior to the @@ -504,6 +519,13 @@ public: /// createSubprogramDIE - Create new DIE using SP. DIE *createSubprogramDIE(DISubprogram SP); + + /// getStringPool - returns the entry into the start of the pool. + MCSymbol *getStringPool(); + + /// getStringPoolEntry - returns an entry into the string pool with the given + /// string text. + MCSymbol *getStringPoolEntry(StringRef Str); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 18b726b173dc..70cc2e56b3e1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, /// CallToNoUnwindFunction - Return `true' if this is a call to a function /// marked `nounwind'. Return `false' otherwise. bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { - assert(MI->getDesc().isCall() && "This should be a call instruction!"); + assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; bool SawFunc = false; @@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - if (MI->getDesc().isCall()) + if (MI->isCall()) SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); continue; } @@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. if (VerboseAsm) { - Asm->OutStreamer.AddComment(Twine(">> Call Site ") + - llvm::utostr(idx) + " <<"); - Asm->OutStreamer.AddComment(Twine(" On exception at call site ") + - llvm::utostr(idx)); + Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx)); } Asm->EmitULEB128(idx); @@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() { if (S.Action == 0) Asm->OutStreamer.AddComment(" Action: cleanup"); else - Asm->OutStreamer.AddComment(Twine(" Action: ") + - llvm::utostr((S.Action - 1) / 2 + 1)); + Asm->OutStreamer.AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() { // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine(">> Call Site ") + - llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" Call between ") + @@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() { if (S.Action == 0) Asm->OutStreamer.AddComment(" On action: cleanup"); else - Asm->OutStreamer.AddComment(Twine(" On action: ") + - llvm::utostr((S.Action - 1) / 2 + 1)); + Asm->OutStreamer.AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() { if (VerboseAsm) { // Emit comments that decode the action table. - Asm->OutStreamer.AddComment(Twine(">> Action Record ") + - llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<"); } // Type Filter @@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() { // type of the catch clauses or the types in the exception specification. if (VerboseAsm) { if (Action.ValueForTypeID > 0) - Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") + - llvm::itostr(Action.ValueForTypeID)); + Asm->OutStreamer.AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); else if (Action.ValueForTypeID < 0) - Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") + - llvm::itostr(Action.ValueForTypeID)); + Asm->OutStreamer.AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); else Asm->OutStreamer.AddComment(" Cleanup"); } @@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment(" No further actions"); } else { unsigned NextAction = Entry + (Action.NextAction + 1) / 2; - Asm->OutStreamer.AddComment(Twine(" Continue to action ") + - llvm::utostr(NextAction)); + Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction)); } } Asm->EmitSLEB128(Action.NextAction); @@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() { I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--)); + Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); if (GV) Asm->EmitReference(GV, TTypeEncoding); else @@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() { if (VerboseAsm) { --Entry; if (TypeID != 0) - Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry)); + Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } Asm->EmitULEB128(TypeID); @@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() { /// EndModule - Emit all exception information that should come after the /// content. void DwarfException::EndModule() { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfException::EndFunction() { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt new file mode 100644 index 000000000000..20b1f7b45b31 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AsmPrinter +parent = Libraries +required_libraries = Analysis CodeGen Core MC MCParser Support Target diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 75288b0934cb..ef1d2baed9ce 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size", namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { + class BranchFolderPass : public MachineFunctionPass { public: static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {} + explicit BranchFolderPass(): MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolderPass(DefaultEnableTailMerge); -} +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - return OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), - getAnalysisIfAvailable<MachineModuleInfo>()); + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + return Folder.OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) ImpDefRegs.insert(SubReg); ++I; @@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; + RS = NULL; - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; @@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, delete RS; return MadeChange; } - + // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); @@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall()) + if (I->isCall()) Time += 10; - else if (MCID.mayLoad() || MCID.mayStore()) + else if (I->mayLoad() || I->mayStore()) Time += 2; else ++Time; @@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { // an object with itself. #ifndef _GLIBCXX_DEBUG llvm_unreachable("Predecessor appears twice"); -#endif +#else return false; +#endif } } @@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, break; } --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; ++NumTerms; } return NumTerms; @@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // heuristics. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && - !MBB1->back().getDesc().isBarrier() && - !MBB2->back().getDesc().isBarrier()) + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. @@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Visit each predecessor only once. if (!UniquePreds.insert(PBB)) continue; + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { @@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + // The 1 below can occur as a result of removing blocks in + // TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { if (!MBBI->isDebugValue()) break; } - return (MBBI->getDesc().isBranch()); + return (MBBI->isBranch()); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock::iterator MBB2I = --MBB2->end(); while (MBB2I->isDebugValue()) --MBB2I; - return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); + return MBB2I->isCall() && !MBB1I->isCall(); +} + +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified @@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); - DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1065,6 +1095,7 @@ ReoptimizeBlock: // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) @@ -1091,7 +1122,7 @@ ReoptimizeBlock: MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); --PrevBBIter; MachineBasicBlock::iterator MBBIter = MBB->begin(); - // Check if DBG_VALUE at the end of PrevBB is identical to the + // Check if DBG_VALUE at the end of PrevBB is identical to the // DBG_VALUE at the beginning of MBB. while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { @@ -1103,7 +1134,7 @@ ReoptimizeBlock: } } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; + PrevBB.removeSuccessor(PrevBB.succ_begin()); assert(PrevBB.succ_empty()); PrevBB.transferSuccessors(MBB); MadeChange = true; @@ -1122,6 +1153,7 @@ ReoptimizeBlock: // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; @@ -1135,6 +1167,7 @@ ReoptimizeBlock: if (PriorTBB == MBB) { SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; @@ -1172,6 +1205,7 @@ ReoptimizeBlock: DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); @@ -1201,6 +1235,7 @@ ReoptimizeBlock: if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector<MachineOperand, 4> NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; @@ -1214,6 +1249,7 @@ ReoptimizeBlock: if (CurTBB && CurCond.empty() && CurFBB == 0 && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1256,8 +1292,9 @@ ReoptimizeBlock: assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1281,9 +1318,10 @@ ReoptimizeBlock: bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1343,7 +1381,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, bool IsDef = false; for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { const MachineOperand &MO = PI->getOperand(i); + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; if (!MO.isReg() || MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else { if (Uses.count(Reg)) { Uses.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Uses.erase(*SR); // Use getSubRegisters to be conservative } Defs.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Defs.insert(*AS); } } @@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { bool IsSafe = true; for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { MachineOperand &MO = TIB->getOperand(i); + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); } } if (!IsSafe) @@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) LocalDefsSet.erase(*OR); } @@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) LocalDefsSet.insert(*OR); } - HasDups = true;; + HasDups = true; ++TIB; ++FIB; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9a5e55160114..21729cd6c380 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,10 +9,9 @@ add_llvm_library(LLVMCodeGen CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp + DFAPacketizer.cpp DwarfEHPrepare.cpp EdgeBundles.cpp - ELFCodeEmitter.cpp - ELFWriter.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp @@ -23,6 +22,7 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp + JITCodeEmitter.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp LexicalScopes.cpp @@ -37,7 +37,10 @@ add_llvm_library(LLVMCodeGen LocalStackSlotAllocation.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp + MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCodeEmitter.cpp + MachineCopyPropagation.cpp MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp @@ -45,6 +48,7 @@ add_llvm_library(LLVMCodeGen MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp MachineInstr.cpp + MachineInstrBundle.cpp MachineLICM.cpp MachineLoopInfo.cpp MachineLoopRanges.cpp @@ -53,9 +57,9 @@ add_llvm_library(LLVMCodeGen MachinePassRegistry.cpp MachineRegisterInfo.cpp MachineSSAUpdater.cpp + MachineScheduler.cpp MachineSink.cpp MachineVerifier.cpp - ObjectCodeEmitter.cpp OcamlGC.cpp OptimizePHIs.cpp PHIElimination.cpp @@ -66,17 +70,16 @@ add_llvm_library(LLVMCodeGen ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + RegAllocBase.cpp RegAllocBasic.cpp RegAllocFast.cpp RegAllocGreedy.cpp - RegAllocLinearScan.cpp RegAllocPBQP.cpp RegisterClassInfo.cpp RegisterCoalescer.cpp RegisterScavenging.cpp RenderMachineFunction.cpp ScheduleDAG.cpp - ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp @@ -87,27 +90,17 @@ add_llvm_library(LLVMCodeGen Spiller.cpp SpillPlacement.cpp SplitKit.cpp - Splitter.cpp StackProtector.cpp StackSlotColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp + TargetFrameLoweringImpl.cpp TargetInstrInfoImpl.cpp TargetLoweringObjectFileImpl.cpp + TargetOptionsImpl.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp - VirtRegRewriter.cpp - ) - -add_llvm_library_dependencies(LLVMCodeGen - LLVMAnalysis - LLVMCore - LLVMMC - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils ) add_subdirectory(SelectionDAG) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 14eb0541dc8d..2b7dfdbe41a0 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void CCState::MarkAllocated(unsigned Reg) { - for (const unsigned *Alias = TRI.getOverlaps(Reg); + for (const uint16_t *Alias = TRI.getOverlaps(Reg); unsigned Reg = *Alias; ++Alias) UsedRegs[Reg/32] |= 1 << (Reg&31); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 424535ba2a1c..a81bb5cc5566 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -19,36 +19,49 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); + initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeExpandPostRAPass(Registry); + initializeExpandISelPseudosPass(Registry); + initializeFinalizeMachineBundlesPass(Registry); + initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); + initializeLocalStackSlotPassPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCopyPropagationPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); + initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); - initializeRALinScanPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); - initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); + initializeTailDuplicatePassPass(Registry); + initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); + initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 270c337ef67e..c13c05e26a20 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -39,9 +39,6 @@ namespace { CodePlacementOpt() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { - return "Code Placement Optimizer"; - } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineLoopInfo>(); @@ -69,9 +66,9 @@ namespace { char CodePlacementOpt::ID = 0; } // end anonymous namespace -FunctionPass *llvm::createCodePlacementOptPass() { - return new CodePlacementOpt(); -} +char &llvm::CodePlacementOptID = CodePlacementOpt::ID; +INITIALIZE_PASS(CodePlacementOpt, "code-placement", + "Code Placement Optimizer", false, false) /// HasFallthrough - Test whether the given branch has a fallthrough, either as /// a plain fallthrough or as a fallthrough case of a conditional branch. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 84c4d59c0e41..bad50103b9c3 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0) {} + DefIndices(TRI->getNumRegs(), 0), + KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { } // Clear "do not change" set. - KeepRegs.clear(); + KeepRegs.reset(); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void CriticalAntiDepBreaker::FinishBlock() { RegRefs.clear(); - KeepRegs.clear(); + KeepRegs.reset(); } void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register operands for this instruction and update @@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); // Now check for aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { // If an alias of the reg is used during the live range, give up. // Note that this allows us to skip checking if AntiDepReg // overlaps with any of the aliases, among other things. @@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { RegRefs.insert(std::make_pair(Reg, &MO)); if (MO.isUse() && Special) { - if (KeepRegs.insert(Reg)) { - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + if (!KeepRegs.test(Reg)) { + KeepRegs.set(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) - KeepRegs.insert(*Subreg); + KeepRegs.set(*Subreg); } } } @@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // address updates. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + + if (MO.isRegMask()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (MO.clobbersPhysReg(i)) { + DefIndices[i] = Count; + KillIndices[i] = ~0u; + KeepRegs.reset(i); + Classes[i] = 0; + RegRefs.erase(i); + } + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, assert(((KillIndices[Reg] == ~0u) != (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); + KeepRegs.reset(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); + KeepRegs.reset(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); + for (const uint16_t *Super = TRI->getSuperRegisters(Reg); *Super; ++Super) { unsigned SuperReg = *Super; Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); @@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, "Kill and Def maps aren't consistent for Reg!"); } // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; @@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &CheckOper = MI->getOperand(i); + if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) + return true; + if (!CheckOper.isReg() || !CheckOper.isDef() || CheckOper.getReg() != NewReg) continue; @@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Keep a map of the MachineInstr*'s back to the SUnit representing them. // This is used for updating debug information. + // + // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap DenseMap<MachineInstr*,const SUnit*> MISUnitMap; // Find the node at the bottom of the critical path. @@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; - else if (KeepRegs.count(AntiDepReg)) + else if (KeepRegs.test(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; @@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 07107802972d..77462593896e 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { @@ -66,7 +65,7 @@ class TargetRegisterInfo; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. - SmallSet<unsigned, 4> KeepRegs; + BitVector KeepRegs; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp new file mode 100644 index 000000000000..bfbe7790998f --- /dev/null +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -0,0 +1,223 @@ +//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class implements a deterministic finite automaton (DFA) based +// packetizing mechanism for VLIW architectures. It provides APIs to +// determine whether there exists a legal mapping of instructions to +// functional unit assignments in a packet. The DFA is auto-generated from +// the target's Schedule.td file. +// +// A DFA consists of 3 major elements: states, inputs, and transitions. For +// the packetizing mechanism, the input is the set of instruction classes for +// a target. The state models all possible combinations of functional unit +// consumption for a given set of instructions in a packet. A transition +// models the addition of an instruction to a packet. In the DFA constructed +// by this class, if an instruction can be added to a packet, then a valid +// transition exists from the corresponding state. Invalid transitions +// indicate that the instruction cannot be added to the current packet. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +using namespace llvm; + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + const unsigned *SET): + InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), + DFAStateEntryTable(SET) {} + + +// +// ReadTable - Read the DFA transition table and update CachedTable. +// +// Format of the transition tables: +// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid +// transitions +// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable +// for the ith state +// +void DFAPacketizer::ReadTable(unsigned int state) { + unsigned ThisState = DFAStateEntryTable[state]; + unsigned NextStateInTable = DFAStateEntryTable[state+1]; + // Early exit in case CachedTable has already contains this + // state's transitions. + if (CachedTable.count(UnsignPair(state, + DFAStateInputTable[ThisState][0]))) + return; + + for (unsigned i = ThisState; i < NextStateInTable; i++) + CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = + DFAStateInputTable[i][1]; +} + + +// canReserveResources - Check if the resources occupied by a MCInstrDesc +// are available in the current state. +bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + return (CachedTable.count(StateTrans) != 0); +} + + +// reserveResources - Reserve the resources occupied by a MCInstrDesc and +// change the current state to reflect that change. +void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + assert(CachedTable.count(StateTrans) != 0); + CurrentState = CachedTable[StateTrans]; +} + + +// canReserveResources - Check if the resources occupied by a machine +// instruction are available in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + return canReserveResources(&MID); +} + +// reserveResources - Reserve the resources occupied by a machine +// instruction and change the current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + reserveResources(&MID); +} + +namespace llvm { +// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides +// Schedule method to build the dependence graph. +class DefaultVLIWScheduler : public ScheduleDAGInstrs { +public: + DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT, bool IsPostRA); + // Schedule - Actual scheduling work. + void schedule(); +}; +} + +DefaultVLIWScheduler::DefaultVLIWScheduler( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : + ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { +} + +void DefaultVLIWScheduler::schedule() { + // Build the scheduling graph. + buildSchedGraph(0); +} + +// VLIWPacketizerList Ctor +VLIWPacketizerList::VLIWPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : TM(MF.getTarget()), MF(MF) { + TII = TM.getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); +} + +// VLIWPacketizerList Dtor +VLIWPacketizerList::~VLIWPacketizerList() { + if (VLIWScheduler) + delete VLIWScheduler; + + if (ResourceTracker) + delete ResourceTracker; +} + +// endPacket - End the current packet, bundle packet instructions and reset +// DFA state. +void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + if (CurrentPacketMIs.size() > 1) { + MachineInstr *MIFirst = CurrentPacketMIs.front(); + finalizeBundle(*MBB, MIFirst, MI); + } + CurrentPacketMIs.clear(); + ResourceTracker->clearResources(); +} + +// PacketizeMIs - Bundle machine instructions into packets. +void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator BeginItr, + MachineBasicBlock::iterator EndItr) { + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->schedule(); + VLIWScheduler->exitRegion(); + + // Generate MI -> SU map. + //std::map <MachineInstr*, SUnit*> MIToSUnit; + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } + + // The main packetizer loop. + for (; BeginItr != EndItr; ++BeginItr) { + MachineInstr *MI = BeginItr; + + this->initPacketizerState(); + + // End the current packet if needed. + if (this->isSoloInstruction(MI)) { + endPacket(MBB, MI); + continue; + } + + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; + assert(SUI && "Missing SUnit Info!"); + + // Ask DFA if machine resource is available for MI. + bool ResourceAvail = ResourceTracker->canReserveResources(MI); + if (ResourceAvail) { + // Dependency check for MI with instructions in CurrentPacketMIs. + for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); VI != VE; ++VI) { + MachineInstr *MJ = *VI; + SUnit *SUJ = MIToSUnit[MJ]; + assert(SUJ && "Missing SUnit Info!"); + + // Is it legal to packetize SUI and SUJ together. + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + // Allow packetization if dependency can be pruned. + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + // End the packet if dependency cannot be pruned. + endPacket(MBB, MI); + break; + } // !isLegalToPruneDependencies. + } // !isLegalToPacketizeTogether. + } // For all instructions in CurrentPacketMIs. + } else { + // End the packet if resource is not available. + endPacket(MBB, MI); + } + + // Add MI to the current packet. + BeginItr = this->addToPacket(MI); + } // For all instructions in BB. + + // End any packet left behind. + endPacket(MBB, EndItr); +} diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 6de6c0cb81bd..aa10d1d41f2b 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -28,11 +28,12 @@ STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { class DeadMachineInstructionElim : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &MF); - + const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; BitVector LivePhysRegs; + BitVector ReservedRegs; public: static char ID; // Pass identification, replacement for typeid @@ -45,14 +46,11 @@ namespace { }; } char DeadMachineInstructionElim::ID = 0; +char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", "Remove dead machine instructions", false, false) -FunctionPass *llvm::createDeadMachineInstructionElimPass() { - return new DeadMachineInstructionElim(); -} - bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Technically speaking inline asm without side effects and no defs can still // be deleted. But there is so much bad inline asm code out there, we should @@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) ? - LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) { - // This def has a non-debug use. Don't delete the instruction! - return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Don't delete live physreg defs, or any reserved register defs. + if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg)) + return false; + } else { + if (!MRI->use_nodbg_empty(Reg)) + // This def has a non-debug use. Don't delete the instruction! + return false; } } } @@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); // Treat reserved registers as always live. - BitVector ReservedRegs = TRI->getReservedRegs(MF); + ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) + if (!MBB->empty() && MBB->back().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; @@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) LivePhysRegs.reset(*SubRegs); } + } else if (MO.isRegMask()) { + // Register mask of preserved registers. All clobbers are dead. + LivePhysRegs.clearBitsNotInMask(MO.getRegMask()); } } // Record the physreg uses, after the defs, in case a physreg is @@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); - for (const unsigned *AliasSet = TRI->getAliasSet(Reg); + for (const uint16_t *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) LivePhysRegs.set(*AliasSet); } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index ed9e409d3e5a..944dd4fb41c8 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -28,98 +28,34 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); -STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); -STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered"); -STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); +STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; const TargetLowering *TLI; - // The eh.exception intrinsic. - Function *ExceptionValueIntrinsic; - - // The eh.selector intrinsic. - Function *SelectorIntrinsic; - - // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call. - Constant *URoR; - - // The EH language-specific catch-all type. - GlobalVariable *EHCatchAllValue; - - // _Unwind_Resume or the target equivalent. + // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; - // We both use and preserve dominator info. - DominatorTree *DT; - - // The function we are running on. - Function *F; - - // The landing pads for this function. - typedef SmallPtrSet<BasicBlock*, 8> BBSet; - BBSet LandingPads; - - bool InsertUnwindResumeCalls(); - - bool NormalizeLandingPads(); - bool LowerUnwindsAndResumes(); - bool MoveExceptionValueCalls(); - - Instruction *CreateExceptionValueCall(BasicBlock *BB); - - /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the "llvm.eh.catch.all.value" call need to convert to using its - /// initializer instead. - bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); - - bool HasCatchAllInSelector(IntrinsicInst *); + bool InsertUnwindResumeCalls(Function &Fn); + Instruction *GetExceptionObject(ResumeInst *RI); - /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, - SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels); - - /// FindAllURoRInvokes - Find all URoR invokes in the function. - void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes); - - /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or - /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to - /// a landing pad within the current function. This is a candidate to merge - /// the selector associated with the URoR invoke with the one from the - /// URoR's landing pad. - bool HandleURoRInvokes(); - - /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated - /// with the eh.exception call. This recursively looks past instructions - /// which don't change the EH pointer value, like casts or PHI nodes. - bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls, - SmallPtrSet<PHINode*, 32> &SeenPHIs); - public: static char ID; // Pass identification, replacement for typeid. DwarfEHPrepare(const TargetMachine *tm) : FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - ExceptionValueIntrinsic(0), SelectorIntrinsic(0), - URoR(0), EHCatchAllValue(0), RewindFunction(0) { + RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } virtual bool runOnFunction(Function &Fn); - // getAnalysisUsage - We need the dominator tree for handling URoR. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - AU.addPreserved<DominatorTree>(); - } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } const char *getPassName() const { return "Exception handling preparation"; } - }; } // end anonymous namespace @@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { return new DwarfEHPrepare(tm); } -/// HasCatchAllInSelector - Return true if the intrinsic instruction has a -/// catch-all. -bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { - if (!EHCatchAllValue) return false; - - unsigned ArgIdx = II->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx)); - return GV == EHCatchAllValue; -} - -/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. -void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, - SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) { - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *II = cast<IntrinsicInst>(*I); - - if (II->getParent()->getParent() != F) - continue; - - if (!HasCatchAllInSelector(II)) - Sels.insert(II); - else - CatchAllSels.insert(II); - } -} - -/// FindAllURoRInvokes - Find all URoR invokes in the function. -void DwarfEHPrepare:: -FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { - for (Value::use_iterator - I = URoR->use_begin(), - E = URoR->use_end(); I != E; ++I) { - if (InvokeInst *II = dyn_cast<InvokeInst>(*I)) - URoRInvokes.insert(II); - } -} - -/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the "llvm.eh.catch.all.value" call need to convert to using its -/// initializer instead. -bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { - if (!EHCatchAllValue) return false; - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - bool Changed = false; - for (SmallPtrSet<IntrinsicInst*, 32>::iterator - I = Sels.begin(), E = Sels.end(); I != E; ++I) { - IntrinsicInst *Sel = *I; - - // Index of the "llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx)); - if (GV != EHCatchAllValue) continue; - Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); - Changed = true; - } - - return Changed; -} - -/// FindSelectorAndURoR - Find the eh.selector call associated with the -/// eh.exception call. And indicate if there is a URoR "invoke" associated with -/// the eh.exception call. This recursively looks past instructions which don't -/// change the EH pointer value, like casts or PHI nodes. -bool -DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls, - SmallPtrSet<PHINode*, 32> &SeenPHIs) { - bool Changed = false; - - for (Value::use_iterator - I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { - Instruction *II = dyn_cast<Instruction>(*I); - if (!II || II->getParent()->getParent() != F) continue; - - if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { - if (Sel->getIntrinsicID() == Intrinsic::eh_selector) - SelCalls.insert(Sel); - } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) { - if (Invoke->getCalledFunction() == URoR) - URoRInvoke = true; - } else if (CastInst *CI = dyn_cast<CastInst>(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); - } else if (PHINode *PN = dyn_cast<PHINode>(II)) { - if (SeenPHIs.insert(PN)) - // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); - } - } - - return Changed; -} - -/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or -/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a -/// landing pad within the current function. This is a candidate to merge the -/// selector associated with the URoR invoke with the one from the URoR's -/// landing pad. -bool DwarfEHPrepare::HandleURoRInvokes() { - if (!EHCatchAllValue) { - EHCatchAllValue = - F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); - if (!EHCatchAllValue) return false; - } - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - SmallPtrSet<IntrinsicInst*, 32> Sels; - SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; - FindAllCleanupSelectors(Sels, CatchAllSels); - - if (!URoR) { - URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(CatchAllSels); - } - - SmallPtrSet<InvokeInst*, 32> URoRInvokes; - FindAllURoRInvokes(URoRInvokes); - - SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; - - for (SmallPtrSet<IntrinsicInst*, 32>::iterator - SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { - const BasicBlock *SelBB = (*SI)->getParent(); - for (SmallPtrSet<InvokeInst*, 32>::iterator - UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { - const BasicBlock *URoRBB = (*UI)->getParent(); - if (DT->dominates(SelBB, URoRBB)) { - SelsToConvert.insert(*SI); - break; +/// GetExceptionObject - Return the exception object from the value passed into +/// the 'resume' instruction (typically an aggregate). Clean up any dead +/// instructions, including the 'resume' instruction. +Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { + Value *V = RI->getOperand(0); + Instruction *ExnObj = 0; + InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V); + LoadInst *SelLoad = 0; + InsertValueInst *ExcIVI = 0; + bool EraseIVIs = false; + + if (SelIVI) { + if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) { + ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0)); + if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) && + ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { + ExnObj = cast<Instruction>(ExcIVI->getOperand(1)); + SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1)); + EraseIVIs = true; } } } - bool Changed = false; - - if (Sels.size() != SelsToConvert.size()) { - // If we haven't been able to convert all of the clean-up selectors, then - // loop through the slow way to see if they still need to be converted. - if (!ExceptionValueIntrinsic) { - ExceptionValueIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) - return CleanupSelectors(CatchAllSels); - } - - for (Value::use_iterator - I = ExceptionValueIntrinsic->use_begin(), - E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); - if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; - - bool URoRInvoke = false; - SmallPtrSet<IntrinsicInst*, 8> SelCalls; - SmallPtrSet<PHINode*, 32> SeenPHIs; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); - - if (URoRInvoke) { - // This EH pointer is being used by an invoke of an URoR instruction and - // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we - // need to convert it to a 'catch-all'. - for (SmallPtrSet<IntrinsicInst*, 8>::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) - if (!HasCatchAllInSelector(*SI)) - SelsToConvert.insert(*SI); - } - } - } - - if (!SelsToConvert.empty()) { - // Convert all clean-up eh.selectors, which are associated with "invokes" of - // URoR calls, into catch-all eh.selectors. - Changed = true; - - for (SmallPtrSet<IntrinsicInst*, 8>::iterator - SI = SelsToConvert.begin(), SE = SelsToConvert.end(); - SI != SE; ++SI) { - IntrinsicInst *II = *SI; - - // Use the exception object pointer and the personality function - // from the original selector. - CallSite CS(II); - IntrinsicInst::op_iterator I = CS.arg_begin(); - IntrinsicInst::op_iterator E = CS.arg_end(); - IntrinsicInst::op_iterator B = prior(E); - - // Exclude last argument if it is an integer. - if (isa<ConstantInt>(B)) E = B; + if (!ExnObj) + ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); - // Add exception object pointer (front). - // Add personality function (next). - // Add in any filter IDs (rest). - SmallVector<Value*, 8> Args(I, E); + RI->eraseFromParent(); - Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. - - CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); - - NewSelector->setTailCall(II->isTailCall()); - NewSelector->setAttributes(II->getAttributes()); - NewSelector->setCallingConv(II->getCallingConv()); - - II->replaceAllUsesWith(NewSelector); - II->eraseFromParent(); - } + if (EraseIVIs) { + if (SelIVI->getNumUses() == 0) + SelIVI->eraseFromParent(); + if (ExcIVI->getNumUses() == 0) + ExcIVI->eraseFromParent(); + if (SelLoad && SelLoad->getNumUses() == 0) + SelLoad->eraseFromParent(); } - Changed |= CleanupSelectors(CatchAllSels); - return Changed; -} - -/// NormalizeLandingPads - Normalize and discover landing pads, noting them -/// in the LandingPads set. A landing pad is normal if the only CFG edges -/// that end at it are unwind edges from invoke instructions. If we inlined -/// through an invoke we could have a normal branch from the previous -/// unwind block through to the landing pad for the original invoke. -/// Abnormal landing pads are fixed up by redirecting all unwind edges to -/// a new basic block which falls through to the original. -bool DwarfEHPrepare::NormalizeLandingPads() { - bool Changed = false; - - const MCAsmInfo *MAI = TM->getMCAsmInfo(); - bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; - - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (!isa<InvokeInst>(TI)) - continue; - BasicBlock *LPad = TI->getSuccessor(1); - // Skip landing pads that have already been normalized. - if (LandingPads.count(LPad)) - continue; - - // Check that only invoke unwind edges end at the landing pad. - bool OnlyUnwoundTo = true; - bool SwitchOK = usingSjLjEH; - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); - PI != PE; ++PI) { - TerminatorInst *PT = (*PI)->getTerminator(); - // The SjLj dispatch block uses a switch instruction. This is effectively - // an unwind edge, so we can disregard it here. There will only ever - // be one dispatch, however, so if there are multiple switches, one - // of them truly is a normal edge, not an unwind edge. - if (SwitchOK && isa<SwitchInst>(PT)) { - SwitchOK = false; - continue; - } - if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { - OnlyUnwoundTo = false; - break; - } - } - - if (OnlyUnwoundTo) { - // Only unwind edges lead to the landing pad. Remember the landing pad. - LandingPads.insert(LPad); - continue; - } - - // At least one normal edge ends at the landing pad. Redirect the unwind - // edges to a new basic block which falls through into this one. - - // Create the new basic block. - BasicBlock *NewBB = BasicBlock::Create(F->getContext(), - LPad->getName() + "_unwind_edge"); - - // Insert it into the function right before the original landing pad. - LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); - - // Redirect unwind edges from the original landing pad to NewBB. - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { - TerminatorInst *PT = (*PI++)->getTerminator(); - if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) - // Unwind to the new block. - PT->setSuccessor(1, NewBB); - } - - // If there are any PHI nodes in LPad, we need to update them so that they - // merge incoming values from NewBB instead. - for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { - PHINode *PN = cast<PHINode>(II); - pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); - - // Check to see if all of the values coming in via unwind edges are the - // same. If so, we don't need to create a new PHI node. - Value *InVal = PN->getIncomingValueForBlock(*PB); - for (pred_iterator PI = PB; PI != PE; ++PI) { - if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { - InVal = 0; - break; - } - } - - if (InVal == 0) { - // Different unwind edges have different values. Create a new PHI node - // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), - PN->getNumIncomingValues(), - PN->getName()+".unwind", NewBB); - // Add an entry for each unwind edge, using the value from the old PHI. - for (pred_iterator PI = PB; PI != PE; ++PI) - NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); - - // Now use this new PHI as the common incoming value for NewBB in PN. - InVal = NewPN; - } - - // Revector exactly one entry in the PHI node to come from NewBB - // and delete all other entries that come from unwind edges. If - // there are both normal and unwind edges from the same predecessor, - // this leaves an entry for the normal edge. - for (pred_iterator PI = PB; PI != PE; ++PI) - PN->removeIncomingValue(*PI); - PN->addIncoming(InVal, NewBB); - } - - // Add a fallthrough from NewBB to the original landing pad. - BranchInst::Create(LPad, NewBB); - - // Now update DominatorTree analysis information. - DT->splitBlock(NewBB); - - // Remember the newly constructed landing pad. The original landing pad - // LPad is no longer a landing pad now that all unwind edges have been - // revectored to NewBB. - LandingPads.insert(NewBB); - ++NumLandingPadsSplit; - Changed = true; - } - - return Changed; -} - -/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, -/// rethrowing any previously caught exception. This will crash horribly -/// at runtime if there is no such exception: using unwind to throw a new -/// exception is currently not supported. -bool DwarfEHPrepare::LowerUnwindsAndResumes() { - SmallVector<Instruction*, 16> ResumeInsts; - - for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) { - for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){ - if (isa<UnwindInst>(bi)) - ResumeInsts.push_back(bi); - else if (CallInst *call = dyn_cast<CallInst>(bi)) - if (Function *fn = dyn_cast<Function>(call->getCalledValue())) - if (fn->getName() == "llvm.eh.resume") - ResumeInsts.push_back(bi); - } - } - - if (ResumeInsts.empty()) return false; - - // Find the rewind function if we didn't already. - if (!RewindFunction) { - LLVMContext &Ctx = ResumeInsts[0]->getContext(); - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Type::getInt8PtrTy(Ctx), false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); - } - - bool Changed = false; - - for (SmallVectorImpl<Instruction*>::iterator - I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) { - Instruction *RI = *I; - - // Replace the resuming instruction with a call to _Unwind_Resume (or the - // appropriate target equivalent). - - llvm::Value *ExnValue; - if (isa<UnwindInst>(RI)) - ExnValue = CreateExceptionValueCall(RI->getParent()); - else - ExnValue = cast<CallInst>(RI)->getArgOperand(0); - - // Create the call... - CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI); - CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); - - // ...followed by an UnreachableInst, if it was an unwind. - // Calls to llvm.eh.resume are typically already followed by this. - if (isa<UnwindInst>(RI)) - new UnreachableInst(RI->getContext(), RI); - - if (isa<UnwindInst>(RI)) - ++NumUnwindsLowered; - else - ++NumResumesLowered; - - // Nuke the resume instruction. - RI->eraseFromParent(); - - Changed = true; - } - - return Changed; -} - -/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from -/// landing pads by replacing calls outside of landing pads with direct use of -/// a register holding the appropriate value; this requires adding calls inside -/// all landing pads to initialize the register. Also, move eh.exception calls -/// inside landing pads to the start of the landing pad (optional, but may make -/// things simpler for later passes). -bool DwarfEHPrepare::MoveExceptionValueCalls() { - // If the eh.exception intrinsic is not declared in the module then there is - // nothing to do. Speed up compilation by checking for this common case. - if (!ExceptionValueIntrinsic && - !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) - return false; - - bool Changed = false; - - // Move calls to eh.exception that are inside a landing pad to the start of - // the landing pad. - for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) { - BasicBlock *LP = *LI; - for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { - // Found a call to eh.exception. - if (!EI->use_empty()) { - // If there is already a call to eh.exception at the start of the - // landing pad, then get hold of it; otherwise create such a call. - Value *CallAtStart = CreateExceptionValueCall(LP); - - // If the call was at the start of a landing pad then leave it alone. - if (EI == CallAtStart) - continue; - EI->replaceAllUsesWith(CallAtStart); - } - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - Changed = true; - } - } - - // Look for calls to eh.exception that are not in a landing pad. If one is - // found, then a register that holds the exception value will be created in - // each landing pad, and the SSAUpdater will be used to compute the values - // returned by eh.exception calls outside of landing pads. - SSAUpdater SSA; - - // Remember where we found the eh.exception call, to avoid rescanning earlier - // basic blocks which we already know contain no eh.exception calls. - bool FoundCallOutsideLandingPad = false; - Function::iterator BB = F->begin(); - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) - if (isa<EHExceptionInst>(II)) { - SSA.Initialize(II->getType(), II->getName()); - FoundCallOutsideLandingPad = true; - break; - } - - if (FoundCallOutsideLandingPad) - break; - } - - // If all calls to eh.exception are in landing pads then we are done. - if (!FoundCallOutsideLandingPad) - return Changed; - - // Add a call to eh.exception at the start of each landing pad, and tell the - // SSAUpdater that this is the value produced by the landing pad. - for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) - SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); - - // Now turn all calls to eh.exception that are not in a landing pad into a use - // of the appropriate register. - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { - // Found a call to eh.exception, replace it with the value from any - // upstream landing pad(s). - EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - } - } - - return true; -} - -/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at -/// the start of the basic block (unless there already is one, in which case -/// the existing call is returned). -Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { - Instruction *Start = BB->getFirstNonPHIOrDbg(); - // Is this a call to eh.exception? - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start)) - if (CI->getIntrinsicID() == Intrinsic::eh_exception) - // Reuse the existing call. - return Start; - - // Find the eh.exception intrinsic if we didn't already. - if (!ExceptionValueIntrinsic) - ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::eh_exception); - - // Create the call. - return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); + return ExnObj; } /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. -bool DwarfEHPrepare::InsertUnwindResumeCalls() { +bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool UsesNewEH = false; SmallVector<ResumeInst*, 16> Resumes; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) Resumes.push_back(RI); @@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. - LLVMContext &Ctx = F->getContext(); - BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(), + LLVMContext &Ctx = Fn.getContext(); + unsigned ResumesSize = Resumes.size(); + + if (ResumesSize == 1) { + // Instead of creating a new BB and PHI node, just append the call to + // _Unwind_Resume to the end of the single resume block. + ResumeInst *RI = Resumes.front(); + BasicBlock *UnwindBB = RI->getParent(); + Instruction *ExnObj = GetExceptionObject(RI); + + // Call the _Unwind_Resume function. + CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; + } + + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. - BasicBlock *UnwindBBDom = Resumes[0]->getParent(); for (SmallVectorImpl<ResumeInst*>::iterator I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { ResumeInst *RI = *I; - BranchInst::Create(UnwindBB, RI->getParent()); - ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0), - 0, "exn.obj", RI); - PN->addIncoming(ExnObj, RI->getParent()); - UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom); - RI->eraseFromParent(); + BasicBlock *Parent = RI->getParent(); + BranchInst::Create(UnwindBB, Parent); + + Instruction *ExnObj = GetExceptionObject(RI); + PN->addIncoming(ExnObj, Parent); + + ++NumResumesLowered; } // Call the function. @@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); - - // Now update DominatorTree analysis information. - DT->addNewBlock(UnwindBB, UnwindBBDom); return true; } bool DwarfEHPrepare::runOnFunction(Function &Fn) { - bool Changed = false; - - // Initialize internal state. - DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH. - F = &Fn; - - if (InsertUnwindResumeCalls()) { - // FIXME: The reset of this function can go once the new EH is done. - LandingPads.clear(); - return true; - } - - // Ensure that only unwind edges end at landing pads (a landing pad is a - // basic block where an invoke unwind edge ends). - Changed |= NormalizeLandingPads(); - - // Turn unwind instructions and eh.resume calls into libcalls. - Changed |= LowerUnwindsAndResumes(); - - // TODO: Move eh.selector calls to landing pads and combine them. - - // Move eh.exception calls to landing pads. - Changed |= MoveExceptionValueCalls(); - - Changed |= HandleURoRInvokes(); - - LandingPads.clear(); - + bool Changed = InsertUnwindResumeCalls(Fn); return Changed; } diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h deleted file mode 100644 index 5b634682cc87..000000000000 --- a/lib/CodeGen/ELF.h +++ /dev/null @@ -1,227 +0,0 @@ -//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header contains common, non-processor-specific data structures and -// constants for the ELF file format. -// -// The details of the ELF32 bits in this file are largely based on the Tool -// Interface Standard (TIS) Executable and Linking Format (ELF) Specification -// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the -// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998 -// -//===----------------------------------------------------------------------===// - -#ifndef CODEGEN_ELF_H -#define CODEGEN_ELF_H - -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/DataTypes.h" - -namespace llvm { - class GlobalValue; - - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - - // ELF symbols are related to llvm ones by being one of the two llvm - // types, for the other ones (section, file, func) a null pointer is - // assumed by default. - union { - const GlobalValue *GV; // If this is a pointer to a GV - const char *Ext; // If this is a pointer to a named symbol - } Source; - - // Describes from which source type this ELF symbol comes from, - // they can be GlobalValue, ExternalSymbol or neither. - enum { - isGV, // The Source.GV field is valid. - isExtSym, // The Source.ExtSym field is valid. - isOther // Not a GlobalValue or External Symbol - }; - unsigned SourceType; - - bool isGlobalValue() const { return SourceType == isGV; } - bool isExternalSym() const { return SourceType == isExtSym; } - - // getGlobalValue - If this is a global value which originated the - // elf symbol, return a reference to it. - const GlobalValue *getGlobalValue() const { - assert(SourceType == isGV && "This is not a global value"); - return Source.GV; - } - - // getExternalSym - If this is an external symbol which originated the - // elf symbol, return a reference to it. - const char *getExternalSymbol() const { - assert(SourceType == isExtSym && "This is not an external symbol"); - return Source.Ext; - } - - // getGV - From a global value return a elf symbol to represent it - static ELFSym *getGV(const GlobalValue *GV, unsigned Bind, - unsigned Type, unsigned Visibility) { - ELFSym *Sym = new ELFSym(); - Sym->Source.GV = GV; - Sym->setBind(Bind); - Sym->setType(Type); - Sym->setVisibility(Visibility); - Sym->SourceType = isGV; - return Sym; - } - - // getExtSym - Create and return an elf symbol to represent an - // external symbol - static ELFSym *getExtSym(const char *Ext) { - ELFSym *Sym = new ELFSym(); - Sym->Source.Ext = Ext; - Sym->setBind(ELF::STB_GLOBAL); - Sym->setType(ELF::STT_NOTYPE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SourceType = isExtSym; - return Sym; - } - - // getSectionSym - Returns a elf symbol to represent an elf section - static ELFSym *getSectionSym() { - ELFSym *Sym = new ELFSym(); - Sym->setBind(ELF::STB_LOCAL); - Sym->setType(ELF::STT_SECTION); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SourceType = isOther; - return Sym; - } - - // getFileSym - Returns a elf symbol to represent the module identifier - static ELFSym *getFileSym() { - ELFSym *Sym = new ELFSym(); - Sym->setBind(ELF::STB_LOCAL); - Sym->setType(ELF::STT_FILE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; - Sym->SourceType = isOther; - return Sym; - } - - // getUndefGV - Returns a STT_NOTYPE symbol - static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) { - ELFSym *Sym = new ELFSym(); - Sym->Source.GV = GV; - Sym->setBind(Bind); - Sym->setType(ELF::STT_NOTYPE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; - Sym->SourceType = isGV; - return Sym; - } - - // ELF specific fields - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - uint8_t Info; - uint8_t Other; - unsigned short SectionIdx; - - // Symbol index into the Symbol table - unsigned SymTabIdx; - - ELFSym() : SourceType(isOther), NameIdx(0), Value(0), - Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0), - SymTabIdx(0) {} - - unsigned getBind() const { return (Info >> 4) & 0xf; } - unsigned getType() const { return Info & 0xf; } - bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; } - bool isFileType() const { return getType() == ELF::STT_FILE; } - - void setBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); - } - - void setType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - - void setVisibility(unsigned V) { - assert(V == (V & 0x3) && "Visibility value out of range!"); - Other = V; - } - }; - - /// ELFSection - This struct contains information about each section that is - /// emitted to the file. This is eventually turned into the section header - /// table at the end of the file. - class ELFSection : public BinaryObject { - public: - // ELF specific fields - unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted. - unsigned Type; // sh_type - Section contents & semantics - unsigned Flags; // sh_flags - Section flags. - uint64_t Addr; // sh_addr - The mem addr this section is in. - unsigned Offset; // sh_offset - Offset from the file start - unsigned Size; // sh_size - The section size. - unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxiliary information. - unsigned Align; // sh_addralign - Alignment of section. - unsigned EntSize; // sh_entsize - Size of entries in the section e - - /// SectionIdx - The number of the section in the Section Table. - unsigned short SectionIdx; - - /// Sym - The symbol to represent this section if it has one. - ELFSym *Sym; - - /// getSymIndex - Returns the symbol table index of the symbol - /// representing this section. - unsigned getSymbolTableIndex() const { - assert(Sym && "section not present in the symbol table"); - return Sym->SymTabIdx; - } - - ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) - : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), - Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {} - }; - - /// ELFRelocation - This class contains all the information necessary to - /// to generate any 32-bit or 64-bit ELF relocation entry. - class ELFRelocation { - uint64_t r_offset; // offset in the section of the object this applies to - uint32_t r_symidx; // symbol table index of the symbol to use - uint32_t r_type; // machine specific relocation type - int64_t r_add; // explicit relocation addend - bool r_rela; // if true then the addend is part of the entry - // otherwise the addend is at the location specified - // by r_offset - public: - uint64_t getInfo(bool is64Bit) const { - if (is64Bit) - return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL); - else - return (r_symidx << 8) + (r_type & 0xFFL); - } - - uint64_t getOffset() const { return r_offset; } - int64_t getAddend() const { return r_add; } - - ELFRelocation(uint64_t off, uint32_t sym, uint32_t type, - bool rela = true, int64_t addend = 0) : - r_offset(off), r_symidx(sym), r_type(type), - r_add(addend), r_rela(rela) {} - }; - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp deleted file mode 100644 index 660424c3c141..000000000000 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ /dev/null @@ -1,205 +0,0 @@ -//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "elfce" - -#include "ELF.h" -#include "ELFWriter.h" -#include "ELFCodeEmitter.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -//===----------------------------------------------------------------------===// -// ELFCodeEmitter Implementation -//===----------------------------------------------------------------------===// - -namespace llvm { - -/// startFunction - This callback is invoked when a new machine function is -/// about to be emitted. -void ELFCodeEmitter::startFunction(MachineFunction &MF) { - DEBUG(dbgs() << "processing function: " - << MF.getFunction()->getName() << "\n"); - - // Get the ELF Section that this function belongs in. - ES = &EW.getTextSection(MF.getFunction()); - - // Set the desired binary object to be used by the code emitters - setBinaryObject(ES); - - // Get the function alignment in bytes - unsigned Align = (1 << MF.getAlignment()); - - // The function must start on its required alignment - ES->emitAlignment(Align); - - // Update the section alignment if needed. - ES->Align = std::max(ES->Align, Align); - - // Record the function start offset - FnStartOff = ES->getCurrentPCOffset(); - - // Emit constant pool and jump tables to their appropriate sections. - // They need to be emitted before the function because in some targets - // the later may reference JT or CP entry address. - emitConstantPool(MF.getConstantPool()); - if (MF.getJumpTableInfo()) - emitJumpTables(MF.getJumpTableInfo()); -} - -/// finishFunction - This callback is invoked after the function is completely -/// finished. -bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { - // Add a symbol to represent the function. - const Function *F = MF.getFunction(); - ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC, - EW.getGlobalELFVisibility(F)); - FnSym->SectionIdx = ES->SectionIdx; - FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; - EW.AddPendingGlobalSymbol(F, true); - - // Offset from start of Section - FnSym->Value = FnStartOff; - - if (!F->hasPrivateLinkage()) - EW.SymbolList.push_back(FnSym); - - // Patch up Jump Table Section relocations to use the real MBBs offsets - // now that the MBB label offsets inside the function are known. - if (MF.getJumpTableInfo()) { - ELFSection &JTSection = EW.getJumpTableSection(); - for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(), - MRE = JTRelocations.end(); MRI != MRE; ++MRI) { - MachineRelocation &MR = *MRI; - uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); - MR.setResultPointer((void*)MBBOffset); - MR.setConstantVal(ES->SectionIdx); - JTSection.addRelocation(MR); - } - } - - // If we have emitted any relocations to function-specific objects such as - // basic blocks, constant pools entries, or jump tables, record their - // addresses now so that we can rewrite them with the correct addresses later - for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { - MachineRelocation &MR = Relocations[i]; - intptr_t Addr; - if (MR.isGlobalValue()) { - EW.AddPendingGlobalSymbol(MR.getGlobalValue()); - } else if (MR.isExternalSymbol()) { - EW.AddPendingExternalSymbol(MR.getExternalSymbol()); - } else if (MR.isBasicBlock()) { - Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); - MR.setConstantVal(ES->SectionIdx); - MR.setResultPointer((void*)Addr); - } else if (MR.isConstantPoolIndex()) { - Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); - MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); - MR.setResultPointer((void*)Addr); - } else if (MR.isJumpTableIndex()) { - ELFSection &JTSection = EW.getJumpTableSection(); - Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); - MR.setConstantVal(JTSection.SectionIdx); - MR.setResultPointer((void*)Addr); - } else { - llvm_unreachable("Unhandled relocation type"); - } - ES->addRelocation(MR); - } - - // Clear per-function data structures. - JTRelocations.clear(); - Relocations.clear(); - CPLocations.clear(); - CPSections.clear(); - JTLocations.clear(); - MBBLocations.clear(); - return false; -} - -/// emitConstantPool - For each constant pool entry, figure out which section -/// the constant should live in and emit the constant -void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { - const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); - if (CP.empty()) return; - - // TODO: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for elf constant pools!"); - - for (unsigned i = 0, e = CP.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = CP[i]; - - // Record the constant pool location and the section index - ELFSection &CstPool = EW.getConstantPoolSection(CPE); - CPLocations.push_back(CstPool.size()); - CPSections.push_back(CstPool.SectionIdx); - - if (CPE.isMachineConstantPoolEntry()) - assert(0 && "CPE.isMachineConstantPoolEntry not supported yet"); - - // Emit the constant to constant pool section - EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); - } -} - -/// emitJumpTables - Emit all the jump tables for a given jump table info -/// record to the appropriate section. -void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - if (JT.empty()) return; - - // FIXME: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for elf jump tables!"); - - const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); - unsigned EntrySize = 4; //MJTI->getEntrySize(); - - // Get the ELF Section to emit the jump table - ELFSection &JTSection = EW.getJumpTableSection(); - - // For each JT, record its offset from the start of the section - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; - - // Record JT 'i' offset in the JT section - JTLocations.push_back(JTSection.size()); - - // Each MBB entry in the Jump table section has a relocation entry - // against the current text section. - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy(); - MachineRelocation MR = - MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]); - - // Add the relocation to the Jump Table section - JTRelocations.push_back(MR); - - // Output placeholder for MBB in the JT section - for (unsigned s=0; s < EntrySize; ++s) - JTSection.emitByte(0); - } - } -} - -} // end namespace llvm diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h deleted file mode 100644 index 8671c674eecf..000000000000 --- a/lib/CodeGen/ELFCodeEmitter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ELFCODEEMITTER_H -#define ELFCODEEMITTER_H - -#include "llvm/CodeGen/ObjectCodeEmitter.h" -#include <vector> - -namespace llvm { - class ELFWriter; - class ELFSection; - - /// ELFCodeEmitter - This class is used by the ELFWriter to - /// emit the code for functions to the ELF file. - class ELFCodeEmitter : public ObjectCodeEmitter { - ELFWriter &EW; - - /// Target machine description - TargetMachine &TM; - - /// Section containing code for functions - ELFSection *ES; - - /// Relocations - Record relocations needed by the current function - std::vector<MachineRelocation> Relocations; - - /// JTRelocations - Record relocations needed by the relocation - /// section. - std::vector<MachineRelocation> JTRelocations; - - /// FnStartPtr - Function offset from the beginning of ELFSection 'ES' - uintptr_t FnStartOff; - public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} - - /// addRelocation - Register new relocations for this function - void addRelocation(const MachineRelocation &MR) { - Relocations.push_back(MR); - } - - /// emitConstantPool - For each constant pool entry, figure out which - /// section the constant should live in and emit data to it - void emitConstantPool(MachineConstantPool *MCP); - - /// emitJumpTables - Emit all the jump tables for a given jump table - /// info and record them to the appropriate section. - void emitJumpTables(MachineJumpTableInfo *MJTI); - - void startFunction(MachineFunction &F); - bool finishFunction(MachineFunction &F); - - /// emitLabel - Emits a label - virtual void emitLabel(MCSymbol *Label) { - assert(0 && "emitLabel not implemented"); - } - - /// getLabelAddress - Return the address of the specified LabelID, - /// only usable after the LabelID has been emitted. - virtual uintptr_t getLabelAddress(MCSymbol *Label) const { - assert(0 && "getLabelAddress not implemented"); - return 0; - } - - virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {} - -}; // end class ELFCodeEmitter - -} // end namespace llvm - -#endif - diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp deleted file mode 100644 index f2c218565854..000000000000 --- a/lib/CodeGen/ELFWriter.cpp +++ /dev/null @@ -1,1105 +0,0 @@ -//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the target-independent ELF writer. This file writes out -// the ELF file in the following order: -// -// #1. ELF Header -// #2. '.text' section -// #3. '.data' section -// #4. '.bss' section (conceptual position in file) -// ... -// #X. '.shstrtab' section -// #Y. Section Table -// -// The entries in the section table are laid out as: -// #0. Null entry [required] -// #1. ".text" entry - the program code -// #2. ".data" entry - global variables with initializers. [ if needed ] -// #3. ".bss" entry - global variables without initializers. [ if needed ] -// ... -// #N. ".shstrtab" entry - String table for the section names. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "elfwriter" -#include "ELF.h" -#include "ELFWriter.h" -#include "ELFCodeEmitter.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/PassManager.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -using namespace llvm; - -char ELFWriter::ID = 0; - -//===----------------------------------------------------------------------===// -// ELFWriter Implementation -//===----------------------------------------------------------------------===// - -ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) - : MachineFunctionPass(ID), O(o), TM(tm), - OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(), - &TM.getTargetLowering()->getObjFileLowering())), - TLOF(TM.getTargetLowering()->getObjFileLowering()), - is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), - isLittleEndian(TM.getTargetData()->isLittleEndian()), - ElfHdr(isLittleEndian, is64Bit) { - - MAI = TM.getMCAsmInfo(); - TEW = TM.getELFWriterInfo(); - - // Create the object code emitter object for this target. - ElfCE = new ELFCodeEmitter(*this); - - // Initial number of sections - NumSections = 0; -} - -ELFWriter::~ELFWriter() { - delete ElfCE; - delete &OutContext; - - while(!SymbolList.empty()) { - delete SymbolList.back(); - SymbolList.pop_back(); - } - - while(!PrivateSyms.empty()) { - delete PrivateSyms.back(); - PrivateSyms.pop_back(); - } - - while(!SectionList.empty()) { - delete SectionList.back(); - SectionList.pop_back(); - } - - // Release the name mangler object. - delete Mang; Mang = 0; -} - -// doInitialization - Emit the file header and all of the global variables for -// the module to the ELF file. -bool ELFWriter::doInitialization(Module &M) { - // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM); - - Mang = new Mangler(OutContext, *TM.getTargetData()); - - // ELF Header - // ---------- - // Fields e_shnum e_shstrndx are only known after all section have - // been emitted. They locations in the ouput buffer are recorded so - // to be patched up later. - // - // Note - // ---- - // emitWord method behaves differently for ELF32 and ELF64, writing - // 4 bytes in the former and 8 in the last for *_off and *_addr elf types - - ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0] - ElfHdr.emitByte('E'); // e_ident[EI_MAG1] - ElfHdr.emitByte('L'); // e_ident[EI_MAG2] - ElfHdr.emitByte('F'); // e_ident[EI_MAG3] - - ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] - ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] - ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION] - ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] - - ElfHdr.emitWord16(ELF::ET_REL); // e_type - ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target - ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version - ElfHdr.emitWord(0); // e_entry, no entry point in .o file - ElfHdr.emitWord(0); // e_phoff, no program header for .o - ELFHdr_e_shoff_Offset = ElfHdr.size(); - ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes - ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants - ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size - ElfHdr.emitWord16(0); // e_phentsize = prog header entry size - ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0 - - // e_shentsize = Section header entry size - ElfHdr.emitWord16(TEW->getSHdrSize()); - - // e_shnum = # of section header ents - ELFHdr_e_shnum_Offset = ElfHdr.size(); - ElfHdr.emitWord16(0); // Placeholder - - // e_shstrndx = Section # of '.shstrtab' - ELFHdr_e_shstrndx_Offset = ElfHdr.size(); - ElfHdr.emitWord16(0); // Placeholder - - // Add the null section, which is required to be first in the file. - getNullSection(); - - // The first entry in the symtab is the null symbol and the second - // is a local symbol containing the module/file name - SymbolList.push_back(new ELFSym()); - SymbolList.push_back(ELFSym::getFileSym()); - - return false; -} - -// AddPendingGlobalSymbol - Add a global to be processed and to -// the global symbol lookup, use a zero index because the table -// index will be determined later. -void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, - bool AddToLookup /* = false */) { - PendingGlobals.insert(GV); - if (AddToLookup) - GblSymLookup[GV] = 0; -} - -// AddPendingExternalSymbol - Add the external to be processed -// and to the external symbol lookup, use a zero index because -// the symbol table index will be determined later. -void ELFWriter::AddPendingExternalSymbol(const char *External) { - PendingExternals.insert(External); - ExtSymLookup[External] = 0; -} - -ELFSection &ELFWriter::getDataSection() { - const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection(); - return getSection(Data->getSectionName(), Data->getType(), - Data->getFlags(), 4); -} - -ELFSection &ELFWriter::getBSSSection() { - const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection(); - return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4); -} - -// getCtorSection - Get the static constructor section -ELFSection &ELFWriter::getCtorSection() { - const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection(); - return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); -} - -// getDtorSection - Get the static destructor section -ELFSection &ELFWriter::getDtorSection() { - const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection(); - return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags()); -} - -// getTextSection - Get the text section for the specified function -ELFSection &ELFWriter::getTextSection(const Function *F) { - const MCSectionELF *Text = - (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); - return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); -} - -// getJumpTableSection - Get a read only section for constants when -// emitting jump tables. TODO: add PIC support -ELFSection &ELFWriter::getJumpTableSection() { - const MCSectionELF *JT = - (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly()); - return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(), - TM.getTargetData()->getPointerABIAlignment()); -} - -// getConstantPoolSection - Get a constant pool section based on the machine -// constant pool entry type and relocation info. -ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { - SectionKind Kind; - switch (CPE.getRelocationInfo()) { - default: llvm_unreachable("Unknown section kind"); - case 2: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case 0: - switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { - case 4: Kind = SectionKind::getMergeableConst4(); break; - case 8: Kind = SectionKind::getMergeableConst8(); break; - case 16: Kind = SectionKind::getMergeableConst16(); break; - default: Kind = SectionKind::getMergeableConst(); break; - } - } - - const MCSectionELF *CPSect = - (const MCSectionELF *)TLOF.getSectionForConstant(Kind); - return getSection(CPSect->getSectionName(), CPSect->getType(), - CPSect->getFlags(), CPE.getAlignment()); -} - -// getRelocSection - Return the relocation section of section 'S'. 'RelA' -// is true if the relocation section contains entries with addends. -ELFSection &ELFWriter::getRelocSection(ELFSection &S) { - unsigned SectionType = TEW->hasRelocationAddend() ? - ELF::SHT_RELA : ELF::SHT_REL; - - std::string SectionName(".rel"); - if (TEW->hasRelocationAddend()) - SectionName.append("a"); - SectionName.append(S.getName()); - - return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment()); -} - -// getGlobalELFVisibility - Returns the ELF specific visibility type -unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { - default: - llvm_unreachable("unknown visibility type"); - case GlobalValue::DefaultVisibility: - return ELF::STV_DEFAULT; - case GlobalValue::HiddenVisibility: - return ELF::STV_HIDDEN; - case GlobalValue::ProtectedVisibility: - return ELF::STV_PROTECTED; - } - return 0; -} - -// getGlobalELFBinding - Returns the ELF specific binding type -unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { - if (GV->hasInternalLinkage()) - return ELF::STB_LOCAL; - - if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) - return ELF::STB_WEAK; - - return ELF::STB_GLOBAL; -} - -// getGlobalELFType - Returns the ELF specific type for a global -unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { - if (GV->isDeclaration()) - return ELF::STT_NOTYPE; - - if (isa<Function>(GV)) - return ELF::STT_FUNC; - - return ELF::STT_OBJECT; -} - -// IsELFUndefSym - True if the global value must be marked as a symbol -// which points to a SHN_UNDEF section. This means that the symbol has -// no definition on the module. -static bool IsELFUndefSym(const GlobalValue *GV) { - return GV->isDeclaration() || (isa<Function>(GV)); -} - -// AddToSymbolList - Update the symbol lookup and If the symbol is -// private add it to PrivateSyms list, otherwise to SymbolList. -void ELFWriter::AddToSymbolList(ELFSym *GblSym) { - assert(GblSym->isGlobalValue() && "Symbol must be a global value"); - - const GlobalValue *GV = GblSym->getGlobalValue(); - if (GV->hasPrivateLinkage()) { - // For a private symbols, keep track of the index inside - // the private list since it will never go to the symbol - // table and won't be patched up later. - PrivateSyms.push_back(GblSym); - GblSymLookup[GV] = PrivateSyms.size()-1; - } else { - // Non private symbol are left with zero indices until - // they are patched up during the symbol table emition - // (where the indicies are created). - SymbolList.push_back(GblSym); - GblSymLookup[GV] = 0; - } -} - -/// HasCommonSymbols - True if this section holds common symbols, this is -/// indicated on the ELF object file by a symbol with SHN_COMMON section -/// header index. -static bool HasCommonSymbols(const MCSectionELF &S) { - // FIXME: this is wrong, a common symbol can be in .data for example. - if (StringRef(S.getSectionName()).startswith(".gnu.linkonce.")) - return true; - - return false; -} - - -// EmitGlobal - Choose the right section for global and emit it -void ELFWriter::EmitGlobal(const GlobalValue *GV) { - - // Check if the referenced symbol is already emitted - if (GblSymLookup.find(GV) != GblSymLookup.end()) - return; - - // Handle ELF Bind, Visibility and Type for the current symbol - unsigned SymBind = getGlobalELFBinding(GV); - unsigned SymType = getGlobalELFType(GV); - bool IsUndefSym = IsELFUndefSym(GV); - - ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind) - : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV)); - - if (!IsUndefSym) { - assert(isa<GlobalVariable>(GV) && "GV not a global variable!"); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - - // Handle special llvm globals - if (EmitSpecialLLVMGlobal(GVar)) - return; - - // Get the ELF section where this global belongs from TLOF - const MCSectionELF *S = - (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM); - ELFSection &ES = - getSection(S->getSectionName(), S->getType(), S->getFlags()); - SectionKind Kind = S->getKind(); - - // The symbol align should update the section alignment if needed - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPreferredAlignment(GVar); - unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); - GblSym->Size = Size; - - if (HasCommonSymbols(*S)) { // Symbol must go to a common section - GblSym->SectionIdx = ELF::SHN_COMMON; - - // A new linkonce section is created for each global in the - // common section, the default alignment is 1 and the symbol - // value contains its alignment. - ES.Align = 1; - GblSym->Value = Align; - - } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS. - GblSym->SectionIdx = ES.SectionIdx; - - // Update the size with alignment and the next object can - // start in the right offset in the section - if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1); - ES.Align = std::max(ES.Align, Align); - - // GblSym->Value should contain the virtual offset inside the section. - // Virtual because the BSS space is not allocated on ELF objects - GblSym->Value = ES.Size; - ES.Size += Size; - - } else { // The symbol must go to some kind of data section - GblSym->SectionIdx = ES.SectionIdx; - - // GblSym->Value should contain the symbol offset inside the section, - // and all symbols should start on their required alignment boundary - ES.Align = std::max(ES.Align, Align); - ES.emitAlignment(Align); - GblSym->Value = ES.size(); - - // Emit the global to the data section 'ES' - EmitGlobalConstant(GVar->getInitializer(), ES); - } - } - - AddToSymbolList(GblSym); -} - -void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, - ELFSection &GblS) { - - // Print the fields in successive locations. Pad to align if needed! - const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CVS->getType()); - const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); - uint64_t sizeSoFar = 0; - for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { - const Constant* field = CVS->getOperand(i); - - // Check if padding is needed and insert one or more 0s. - uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); - uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) - - cvsLayout->getElementOffset(i)) - fieldSize; - sizeSoFar += fieldSize + padSize; - - // Now print the actual field value. - EmitGlobalConstant(field, GblS); - - // Insert padding - this may include padding to increase the size of the - // current field up to the ABI size (if the struct is not packed) as well - // as padding to ensure that the next field starts at the right offset. - GblS.emitZeros(padSize); - } - assert(sizeSoFar == cvsLayout->getSizeInBytes() && - "Layout of constant struct may be incorrect!"); -} - -void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { - const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CV->getType()); - - if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { - for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) - EmitGlobalConstant(CVA->getOperand(i), GblS); - return; - } else if (isa<ConstantAggregateZero>(CV)) { - GblS.emitZeros(Size); - return; - } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { - EmitGlobalConstantStruct(CVS, GblS); - return; - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - APInt Val = CFP->getValueAPF().bitcastToAPInt(); - if (CFP->getType()->isDoubleTy()) - GblS.emitWord64(Val.getZExtValue()); - else if (CFP->getType()->isFloatTy()) - GblS.emitWord32(Val.getZExtValue()); - else if (CFP->getType()->isX86_FP80Ty()) { - unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- - TD->getTypeStoreSize(CFP->getType()); - GblS.emitWordFP80(Val.getRawData(), PadSize); - } else if (CFP->getType()->isPPC_FP128Ty()) - llvm_unreachable("PPC_FP128Ty global emission not implemented"); - return; - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - if (Size == 1) - GblS.emitByte(CI->getZExtValue()); - else if (Size == 2) - GblS.emitWord16(CI->getZExtValue()); - else if (Size == 4) - GblS.emitWord32(CI->getZExtValue()); - else - EmitGlobalConstantLargeInt(CI, GblS); - return; - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { - VectorType *PTy = CP->getType(); - for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) - EmitGlobalConstant(CP->getOperand(I), GblS); - return; - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { - // Resolve a constant expression which returns a (Constant, Offset) - // pair. If 'Res.first' is a GlobalValue, emit a relocation with - // the offset 'Res.second', otherwise emit a global constant like - // it is always done for not contant expression types. - CstExprResTy Res = ResolveConstantExpr(CE); - const Constant *Op = Res.first; - - if (isa<GlobalValue>(Op)) - EmitGlobalDataRelocation(cast<const GlobalValue>(Op), - TD->getTypeAllocSize(Op->getType()), - GblS, Res.second); - else - EmitGlobalConstant(Op, GblS); - - return; - } else if (CV->getType()->getTypeID() == Type::PointerTyID) { - // Fill the data entry with zeros or emit a relocation entry - if (isa<ConstantPointerNull>(CV)) - GblS.emitZeros(Size); - else - EmitGlobalDataRelocation(cast<const GlobalValue>(CV), - Size, GblS); - return; - } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { - // This is a constant address for a global variable or function and - // therefore must be referenced using a relocation entry. - EmitGlobalDataRelocation(GV, Size, GblS); - return; - } - - std::string msg; - raw_string_ostream ErrorMsg(msg); - ErrorMsg << "Constant unimp for type: " << *CV->getType(); - report_fatal_error(ErrorMsg.str()); -} - -// ResolveConstantExpr - Resolve the constant expression until it stop -// yielding other constant expressions. -CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { - const TargetData *TD = TM.getTargetData(); - - // There ins't constant expression inside others anymore - if (!isa<ConstantExpr>(CV)) - return std::make_pair(CV, 0); - - const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); - switch (CE->getOpcode()) { - case Instruction::BitCast: - return ResolveConstantExpr(CE->getOperand(0)); - - case Instruction::GetElementPtr: { - const Constant *ptrVal = CE->getOperand(0); - SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec); - return std::make_pair(ptrVal, Offset); - } - case Instruction::IntToPtr: { - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), - false/*ZExt*/); - return ResolveConstantExpr(Op); - } - case Instruction::PtrToInt: { - Constant *Op = CE->getOperand(0); - Type *Ty = CE->getType(); - - // We can emit the pointer value into this slot if the slot is an - // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) - return ResolveConstantExpr(Op); - - llvm_unreachable("Integer size less then pointer size"); - } - case Instruction::Add: - case Instruction::Sub: { - // Only handle cases where there's a constant expression with GlobalValue - // as first operand and ConstantInt as second, which are the cases we can - // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1 - // 1) Instruction::Add => (global) + CstInt - // 2) Instruction::Sub => (global) + -CstInt - const Constant *Op0 = CE->getOperand(0); - const Constant *Op1 = CE->getOperand(1); - assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt"); - - CstExprResTy Res = ResolveConstantExpr(Op0); - assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue"); - - const APInt &RHS = cast<ConstantInt>(Op1)->getValue(); - switch (CE->getOpcode()) { - case Instruction::Add: - return std::make_pair(Res.first, RHS.getSExtValue()); - case Instruction::Sub: - return std::make_pair(Res.first, (-RHS).getSExtValue()); - } - } - } - - report_fatal_error(CE->getOpcodeName() + - StringRef(": Unsupported ConstantExpr type")); - - return std::make_pair(CV, 0); // silence warning -} - -void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, - ELFSection &GblS, int64_t Offset) { - // Create the relocation entry for the global value - MachineRelocation MR = - MachineRelocation::getGV(GblS.getCurrentPCOffset(), - TEW->getAbsoluteLabelMachineRelTy(), - const_cast<GlobalValue*>(GV), - Offset); - - // Fill the data entry with zeros - GblS.emitZeros(Size); - - // Add the relocation entry for the current data section - GblS.addRelocation(MR); -} - -void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, - ELFSection &S) { - const TargetData *TD = TM.getTargetData(); - unsigned BitWidth = CI->getBitWidth(); - assert(isPowerOf2_32(BitWidth) && - "Non-power-of-2-sized integers not handled!"); - - const uint64_t *RawData = CI->getValue().getRawData(); - uint64_t Val = 0; - for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i]; - S.emitWord64(Val); - } -} - -/// EmitSpecialLLVMGlobal - Check to see if the specified global is a -/// special global used by LLVM. If so, emit it and return true, otherwise -/// do nothing and return false. -bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { - if (GV->getName() == "llvm.used") - llvm_unreachable("not implemented yet"); - - // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (GV->getSection() == "llvm.metadata" || - GV->hasAvailableExternallyLinkage()) - return true; - - if (!GV->hasAppendingLinkage()) return false; - - assert(GV->hasInitializer() && "Not a special LLVM global!"); - - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPointerPrefAlignment(); - if (GV->getName() == "llvm.global_ctors") { - ELFSection &Ctor = getCtorSection(); - Ctor.emitAlignment(Align); - EmitXXStructorList(GV->getInitializer(), Ctor); - return true; - } - - if (GV->getName() == "llvm.global_dtors") { - ELFSection &Dtor = getDtorSection(); - Dtor.emitAlignment(Align); - EmitXXStructorList(GV->getInitializer(), Dtor); - return true; - } - - return false; -} - -/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the -/// function pointers, ignoring the init priority. -void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) { - // Should be an array of '{ i32, void ()* }' structs. The first value is the - // init priority, which we ignore. - if (List->isNullValue()) return; - const ConstantArray *InitList = cast<ConstantArray>(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (InitList->getOperand(i)->isNullValue()) - continue; - ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i)); - - if (CS->getOperand(1)->isNullValue()) - continue; - - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1), Xtor); - } -} - -bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { - // Nothing to do here, this is all done through the ElfCE object above. - return false; -} - -/// doFinalization - Now that the module has been completely processed, emit -/// the ELF file to 'O'. -bool ELFWriter::doFinalization(Module &M) { - // Emit .data section placeholder - getDataSection(); - - // Emit .bss section placeholder - getBSSSection(); - - // Build and emit data, bss and "common" sections. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobal(I); - - // Emit all pending globals - for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end(); - I != E; ++I) - EmitGlobal(*I); - - // Emit all pending externals - for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end(); - I != E; ++I) - SymbolList.push_back(ELFSym::getExtSym(*I)); - - // Emit a symbol for each section created until now, skip null section - for (unsigned i = 1, e = SectionList.size(); i < e; ++i) { - ELFSection &ES = *SectionList[i]; - ELFSym *SectionSym = ELFSym::getSectionSym(); - SectionSym->SectionIdx = ES.SectionIdx; - SymbolList.push_back(SectionSym); - ES.Sym = SymbolList.back(); - } - - // Emit string table - EmitStringTable(M.getModuleIdentifier()); - - // Emit the symbol table now, if non-empty. - EmitSymbolTable(); - - // Emit the relocation sections. - EmitRelocations(); - - // Emit the sections string table. - EmitSectionTableStringTable(); - - // Dump the sections and section table to the .o file. - OutputSectionsAndSectionTable(); - - return false; -} - -// RelocateField - Patch relocatable field with 'Offset' in 'BO' -// using a 'Value' of known 'Size' -void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset, - int64_t Value, unsigned Size) { - if (Size == 32) - BO.fixWord32(Value, Offset); - else if (Size == 64) - BO.fixWord64(Value, Offset); - else - llvm_unreachable("don't know howto patch relocatable field"); -} - -/// EmitRelocations - Emit relocations -void ELFWriter::EmitRelocations() { - - // True if the target uses the relocation entry to hold the addend, - // otherwise the addend is written directly to the relocatable field. - bool HasRelA = TEW->hasRelocationAddend(); - - // Create Relocation sections for each section which needs it. - for (unsigned i=0, e=SectionList.size(); i != e; ++i) { - ELFSection &S = *SectionList[i]; - - // This section does not have relocations - if (!S.hasRelocations()) continue; - ELFSection &RelSec = getRelocSection(S); - - // 'Link' - Section hdr idx of the associated symbol table - // 'Info' - Section hdr idx of the section to which the relocation applies - ELFSection &SymTab = getSymbolTableSection(); - RelSec.Link = SymTab.SectionIdx; - RelSec.Info = S.SectionIdx; - RelSec.EntSize = TEW->getRelocationEntrySize(); - - // Get the relocations from Section - std::vector<MachineRelocation> Relos = S.getRelocations(); - for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(), - MRE = Relos.end(); MRI != MRE; ++MRI) { - MachineRelocation &MR = *MRI; - - // Relocatable field offset from the section start - unsigned RelOffset = MR.getMachineCodeOffset(); - - // Symbol index in the symbol table - unsigned SymIdx = 0; - - // Target specific relocation field type and size - unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); - unsigned RelTySize = TEW->getRelocationTySize(RelType); - int64_t Addend = 0; - - // There are several machine relocations types, and each one of - // them needs a different approach to retrieve the symbol table index. - if (MR.isGlobalValue()) { - const GlobalValue *G = MR.getGlobalValue(); - int64_t GlobalOffset = MR.getConstantVal(); - SymIdx = GblSymLookup[G]; - if (G->hasPrivateLinkage()) { - // If the target uses a section offset in the relocation: - // SymIdx + Addend = section sym for global + section offset - unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx; - Addend = PrivateSyms[SymIdx]->Value + GlobalOffset; - SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); - } else { - Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset); - } - } else if (MR.isExternalSymbol()) { - const char *ExtSym = MR.getExternalSymbol(); - SymIdx = ExtSymLookup[ExtSym]; - Addend = TEW->getDefaultAddendForRelTy(RelType); - } else { - // Get the symbol index for the section symbol - unsigned SectionIdx = MR.getConstantVal(); - SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); - - // The symbol offset inside the section - int64_t SymOffset = (int64_t)MR.getResultPointer(); - - // For pc relative relocations where symbols are defined in the same - // section they are referenced, ignore the relocation entry and patch - // the relocatable field with the symbol offset directly. - if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) { - int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType); - RelocateField(S, RelOffset, Value, RelTySize); - continue; - } - - Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset); - } - - // The target without addend on the relocation symbol must be - // patched in the relocation place itself to contain the addend - // otherwise write zeros to make sure there is no garbage there - RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize); - - // Get the relocation entry and emit to the relocation section - ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend); - EmitRelocation(RelSec, Rel, HasRelA); - } - } -} - -/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel' -void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, - bool HasRelA) { - RelSec.emitWord(Rel.getOffset()); - RelSec.emitWord(Rel.getInfo(is64Bit)); - if (HasRelA) - RelSec.emitWord(Rel.getAddend()); -} - -/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable' -void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { - if (is64Bit) { - SymbolTable.emitWord32(Sym.NameIdx); - SymbolTable.emitByte(Sym.Info); - SymbolTable.emitByte(Sym.Other); - SymbolTable.emitWord16(Sym.SectionIdx); - SymbolTable.emitWord64(Sym.Value); - SymbolTable.emitWord64(Sym.Size); - } else { - SymbolTable.emitWord32(Sym.NameIdx); - SymbolTable.emitWord32(Sym.Value); - SymbolTable.emitWord32(Sym.Size); - SymbolTable.emitByte(Sym.Info); - SymbolTable.emitByte(Sym.Other); - SymbolTable.emitWord16(Sym.SectionIdx); - } -} - -/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' -/// Section Header Table -void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, - const ELFSection &SHdr) { - SHdrTab.emitWord32(SHdr.NameIdx); - SHdrTab.emitWord32(SHdr.Type); - if (is64Bit) { - SHdrTab.emitWord64(SHdr.Flags); - SHdrTab.emitWord(SHdr.Addr); - SHdrTab.emitWord(SHdr.Offset); - SHdrTab.emitWord64(SHdr.Size); - SHdrTab.emitWord32(SHdr.Link); - SHdrTab.emitWord32(SHdr.Info); - SHdrTab.emitWord64(SHdr.Align); - SHdrTab.emitWord64(SHdr.EntSize); - } else { - SHdrTab.emitWord32(SHdr.Flags); - SHdrTab.emitWord(SHdr.Addr); - SHdrTab.emitWord(SHdr.Offset); - SHdrTab.emitWord32(SHdr.Size); - SHdrTab.emitWord32(SHdr.Link); - SHdrTab.emitWord32(SHdr.Info); - SHdrTab.emitWord32(SHdr.Align); - SHdrTab.emitWord32(SHdr.EntSize); - } -} - -/// EmitStringTable - If the current symbol table is non-empty, emit the string -/// table for it -void ELFWriter::EmitStringTable(const std::string &ModuleName) { - if (!SymbolList.size()) return; // Empty symbol table. - ELFSection &StrTab = getStringTableSection(); - - // Set the zero'th symbol to a null byte, as required. - StrTab.emitByte(0); - - // Walk on the symbol list and write symbol names into the string table. - unsigned Index = 1; - for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { - ELFSym &Sym = *(*I); - - std::string Name; - if (Sym.isGlobalValue()) { - SmallString<40> NameStr; - Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false); - Name.append(NameStr.begin(), NameStr.end()); - } else if (Sym.isExternalSym()) - Name.append(Sym.getExternalSymbol()); - else if (Sym.isFileType()) - Name.append(ModuleName); - - if (Name.empty()) { - Sym.NameIdx = 0; - } else { - Sym.NameIdx = Index; - StrTab.emitString(Name); - - // Keep track of the number of bytes emitted to this section. - Index += Name.size()+1; - } - } - assert(Index == StrTab.size()); - StrTab.Size = Index; -} - -// SortSymbols - On the symbol table local symbols must come before -// all other symbols with non-local bindings. The return value is -// the position of the first non local symbol. -unsigned ELFWriter::SortSymbols() { - unsigned FirstNonLocalSymbol; - std::vector<ELFSym*> LocalSyms, OtherSyms; - - for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { - if ((*I)->isLocalBind()) - LocalSyms.push_back(*I); - else - OtherSyms.push_back(*I); - } - SymbolList.clear(); - FirstNonLocalSymbol = LocalSyms.size(); - - for (unsigned i = 0; i < FirstNonLocalSymbol; ++i) - SymbolList.push_back(LocalSyms[i]); - - for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I) - SymbolList.push_back(*I); - - LocalSyms.clear(); - OtherSyms.clear(); - - return FirstNonLocalSymbol; -} - -/// EmitSymbolTable - Emit the symbol table itself. -void ELFWriter::EmitSymbolTable() { - if (!SymbolList.size()) return; // Empty symbol table. - - // Now that we have emitted the string table and know the offset into the - // string table of each symbol, emit the symbol table itself. - ELFSection &SymTab = getSymbolTableSection(); - SymTab.Align = TEW->getPrefELFAlignment(); - - // Section Index of .strtab. - SymTab.Link = getStringTableSection().SectionIdx; - - // Size of each symtab entry. - SymTab.EntSize = TEW->getSymTabEntrySize(); - - // Reorder the symbol table with local symbols first! - unsigned FirstNonLocalSymbol = SortSymbols(); - - // Emit all the symbols to the symbol table. - for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) { - ELFSym &Sym = *SymbolList[i]; - - // Emit symbol to the symbol table - EmitSymbol(SymTab, Sym); - - // Record the symbol table index for each symbol - if (Sym.isGlobalValue()) - GblSymLookup[Sym.getGlobalValue()] = i; - else if (Sym.isExternalSym()) - ExtSymLookup[Sym.getExternalSymbol()] = i; - - // Keep track on the symbol index into the symbol table - Sym.SymTabIdx = i; - } - - // One greater than the symbol table index of the last local symbol - SymTab.Info = FirstNonLocalSymbol; - SymTab.Size = SymTab.size(); -} - -/// EmitSectionTableStringTable - This method adds and emits a section for the -/// ELF Section Table string table: the string table that holds all of the -/// section names. -void ELFWriter::EmitSectionTableStringTable() { - // First step: add the section for the string table to the list of sections: - ELFSection &SHStrTab = getSectionHeaderStringTableSection(); - - // Now that we know which section number is the .shstrtab section, update the - // e_shstrndx entry in the ELF header. - ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); - - // Set the NameIdx of each section in the string table and emit the bytes for - // the string table. - unsigned Index = 0; - - for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { - ELFSection &S = *(*I); - // Set the index into the table. Note if we have lots of entries with - // common suffixes, we could memoize them here if we cared. - S.NameIdx = Index; - SHStrTab.emitString(S.getName()); - - // Keep track of the number of bytes emitted to this section. - Index += S.getName().size()+1; - } - - // Set the size of .shstrtab now that we know what it is. - assert(Index == SHStrTab.size()); - SHStrTab.Size = Index; -} - -/// OutputSectionsAndSectionTable - Now that we have constructed the file header -/// and all of the sections, emit these to the ostream destination and emit the -/// SectionTable. -void ELFWriter::OutputSectionsAndSectionTable() { - // Pass #1: Compute the file offset for each section. - size_t FileOff = ElfHdr.size(); // File header first. - - // Adjust alignment of all section if needed, skip the null section. - for (unsigned i=1, e=SectionList.size(); i < e; ++i) { - ELFSection &ES = *SectionList[i]; - if (!ES.size()) { - ES.Offset = FileOff; - continue; - } - - // Update Section size - if (!ES.Size) - ES.Size = ES.size(); - - // Align FileOff to whatever the alignment restrictions of the section are. - if (ES.Align) - FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1); - - ES.Offset = FileOff; - FileOff += ES.Size; - } - - // Align Section Header. - unsigned TableAlign = TEW->getPrefELFAlignment(); - FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); - - // Now that we know where all of the sections will be emitted, set the e_shnum - // entry in the ELF header. - ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset); - - // Now that we know the offset in the file of the section table, update the - // e_shoff address in the ELF header. - ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset); - - // Now that we know all of the data in the file header, emit it and all of the - // sections! - O.write((char *)&ElfHdr.getData()[0], ElfHdr.size()); - FileOff = ElfHdr.size(); - - // Section Header Table blob - BinaryObject SHdrTable(isLittleEndian, is64Bit); - - // Emit all of sections to the file and build the section header table. - for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { - ELFSection &S = *(*I); - DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() - << ", Size: " << S.Size << ", Offset: " << S.Offset - << ", SectionData Size: " << S.size() << "\n"); - - // Align FileOff to whatever the alignment restrictions of the section are. - if (S.size()) { - if (S.Align) { - for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); - FileOff != NewFileOff; ++FileOff) - O << (char)0xAB; - } - O.write((char *)&S.getData()[0], S.Size); - FileOff += S.Size; - } - - EmitSectionHeader(SHdrTable, S); - } - - // Align output for the section table. - for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); - FileOff != NewFileOff; ++FileOff) - O << (char)0xAB; - - // Emit the section table itself. - O.write((char *)&SHdrTable.getData()[0], SHdrTable.size()); -} diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h deleted file mode 100644 index 6f7fbace8aba..000000000000 --- a/lib/CodeGen/ELFWriter.h +++ /dev/null @@ -1,251 +0,0 @@ -//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ELFWriter class. -// -//===----------------------------------------------------------------------===// - -#ifndef ELFWRITER_H -#define ELFWRITER_H - -#include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include <map> - -namespace llvm { - class BinaryObject; - class Constant; - class ConstantInt; - class ConstantStruct; - class ELFCodeEmitter; - class ELFRelocation; - class ELFSection; - struct ELFSym; - class GlobalVariable; - class JITDebugRegisterer; - class Mangler; - class MachineCodeEmitter; - class MachineConstantPoolEntry; - class ObjectCodeEmitter; - class MCAsmInfo; - class TargetELFWriterInfo; - class TargetLoweringObjectFile; - class raw_ostream; - class SectionKind; - class MCContext; - class TargetMachine; - - typedef std::vector<ELFSym*>::iterator ELFSymIter; - typedef std::vector<ELFSection*>::iterator ELFSectionIter; - typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter; - typedef SetVector<const char *>::const_iterator PendingExtsIter; - typedef std::pair<const Constant *, int64_t> CstExprResTy; - - /// ELFWriter - This class implements the common target-independent code for - /// writing ELF files. Targets should derive a class from this to - /// parameterize the output format. - /// - class ELFWriter : public MachineFunctionPass { - friend class ELFCodeEmitter; - friend class JITDebugRegisterer; - public: - static char ID; - - /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter - ObjectCodeEmitter *getObjectCodeEmitter() { - return reinterpret_cast<ObjectCodeEmitter*>(ElfCE); - } - - ELFWriter(raw_ostream &O, TargetMachine &TM); - ~ELFWriter(); - - protected: - /// Output stream to send the resultant object file to. - raw_ostream &O; - - /// Target machine description. - TargetMachine &TM; - - /// Context object for machine code objects. - MCContext &OutContext; - - /// Target Elf Writer description. - const TargetELFWriterInfo *TEW; - - /// Mang - The object used to perform name mangling for this module. - Mangler *Mang; - - /// MCE - The MachineCodeEmitter object that we are exposing to emit machine - /// code for functions to the .o file. - ELFCodeEmitter *ElfCE; - - /// TLOF - Target Lowering Object File, provide section names for globals - /// and other object file specific stuff - const TargetLoweringObjectFile &TLOF; - - /// MAI - Target Asm Info, provide information about section names for - /// globals and other target specific stuff. - const MCAsmInfo *MAI; - - //===------------------------------------------------------------------===// - // Properties inferred automatically from the target machine. - //===------------------------------------------------------------------===// - - /// is64Bit/isLittleEndian - This information is inferred from the target - /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. - bool is64Bit, isLittleEndian; - - /// doInitialization - Emit the file header and all of the global variables - /// for the module to the ELF file. - bool doInitialization(Module &M); - bool runOnMachineFunction(MachineFunction &MF); - - /// doFinalization - Now that the module has been completely processed, emit - /// the ELF file to 'O'. - bool doFinalization(Module &M); - - private: - /// Blob containing the Elf header - BinaryObject ElfHdr; - - /// SectionList - This is the list of sections that we have emitted to the - /// file. Once the file has been completely built, the section header table - /// is constructed from this info. - std::vector<ELFSection*> SectionList; - unsigned NumSections; // Always = SectionList.size() - - /// SectionLookup - This is a mapping from section name to section number in - /// the SectionList. Used to quickly gather the Section Index from MAI names - std::map<std::string, ELFSection*> SectionLookup; - - /// PendingGlobals - Globals not processed as symbols yet. - SetVector<const GlobalValue*> PendingGlobals; - - /// GblSymLookup - This is a mapping from global value to a symbol index - /// in the symbol table or private symbols list. This is useful since reloc - /// symbol references must be quickly mapped to their indices on the lists. - std::map<const GlobalValue*, uint32_t> GblSymLookup; - - /// PendingExternals - Externals not processed as symbols yet. - SetVector<const char *> PendingExternals; - - /// ExtSymLookup - This is a mapping from externals to a symbol index - /// in the symbol table list. This is useful since reloc symbol references - /// must be quickly mapped to their symbol table indices. - std::map<const char *, uint32_t> ExtSymLookup; - - /// SymbolList - This is the list of symbols emitted to the symbol table. - /// When the SymbolList is finally built, local symbols must be placed in - /// the beginning while non-locals at the end. - std::vector<ELFSym*> SymbolList; - - /// PrivateSyms - Record private symbols, every symbol here must never be - /// present in the SymbolList. - std::vector<ELFSym*> PrivateSyms; - - /// getSection - Return the section with the specified name, creating a new - /// section if one does not already exist. - ELFSection &getSection(const std::string &Name, unsigned Type, - unsigned Flags = 0, unsigned Align = 0) { - ELFSection *&SN = SectionLookup[Name]; - if (SN) return *SN; - - SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit)); - SN = SectionList.back(); - SN->SectionIdx = NumSections++; - SN->Type = Type; - SN->Flags = Flags; - SN->Link = ELF::SHN_UNDEF; - SN->Align = Align; - return *SN; - } - - ELFSection &getNonExecStackSection() { - return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1); - } - - ELFSection &getSymbolTableSection() { - return getSection(".symtab", ELF::SHT_SYMTAB, 0); - } - - ELFSection &getStringTableSection() { - return getSection(".strtab", ELF::SHT_STRTAB, 0, 1); - } - - ELFSection &getSectionHeaderStringTableSection() { - return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1); - } - - ELFSection &getNullSection() { - return getSection("", ELF::SHT_NULL, 0); - } - - ELFSection &getDataSection(); - ELFSection &getBSSSection(); - ELFSection &getCtorSection(); - ELFSection &getDtorSection(); - ELFSection &getJumpTableSection(); - ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); - ELFSection &getTextSection(const Function *F); - ELFSection &getRelocSection(ELFSection &S); - - // Helpers for obtaining ELF specific info. - unsigned getGlobalELFBinding(const GlobalValue *GV); - unsigned getGlobalELFType(const GlobalValue *GV); - unsigned getGlobalELFVisibility(const GlobalValue *GV); - - // AddPendingGlobalSymbol - Add a global to be processed and to - // the global symbol lookup, use a zero index because the table - // index will be determined later. - void AddPendingGlobalSymbol(const GlobalValue *GV, - bool AddToLookup = false); - - // AddPendingExternalSymbol - Add the external to be processed - // and to the external symbol lookup, use a zero index because - // the symbol table index will be determined later. - void AddPendingExternalSymbol(const char *External); - - // AddToSymbolList - Update the symbol lookup and If the symbol is - // private add it to PrivateSyms list, otherwise to SymbolList. - void AddToSymbolList(ELFSym *GblSym); - - // As we complete the ELF file, we need to update fields in the ELF header - // (e.g. the location of the section table). These members keep track of - // the offset in ELFHeader of these various pieces to update and other - // locations in the file. - unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. - unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. - unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. - - private: - void EmitGlobal(const GlobalValue *GV); - void EmitGlobalConstant(const Constant *C, ELFSection &GblS); - void EmitGlobalConstantStruct(const ConstantStruct *CVS, - ELFSection &GblS); - void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S); - void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, - ELFSection &GblS, int64_t Offset = 0); - bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - void EmitXXStructorList(const Constant *List, ELFSection &Xtor); - void EmitRelocations(); - void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); - void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); - void EmitSectionTableStringTable(); - void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); - void EmitSymbolTable(); - void EmitStringTable(const std::string &ModuleName); - void OutputSectionsAndSectionTable(); - void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value, - unsigned Size); - unsigned SortSymbols(); - CstExprResTy ResolveConstantExpr(const Constant *CV); - }; -} - -#endif diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index a7aba89b87f3..3bb04657b58a 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -77,7 +77,7 @@ void EdgeBundles::view() const { /// Specialize WriteGraph, the standard implementation won't work. raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, bool ShortNames, - const std::string &Title) { + const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 01dccdb71e4b..a48c5400abcb 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +45,7 @@ using namespace llvm; /// DomainValue for each register, but it may contain multiple execution /// domains. A register value is initially created in a single execution /// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact the the register is now available in multiple +/// keep track of the fact that the register is now available in multiple /// domains. namespace { struct DomainValue { @@ -57,8 +57,10 @@ struct DomainValue { // domains where the register is available for free. unsigned AvailableDomains; - // Position of the last defining instruction. - unsigned Dist; + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by folowing the chain. + DomainValue *Next; // Twiddleable instructions using or defining these registers. SmallVector<MachineInstr*, 8> Instrs; @@ -92,16 +94,33 @@ struct DomainValue { return CountTrailingZeros_32(AvailableDomains); } - DomainValue() { clear(); } + DomainValue() : Refs(0) { clear(); } + // Clear this DomainValue and point to next which has all its data. void clear() { - Refs = AvailableDomains = Dist = 0; + AvailableDomains = 0; + Next = 0; Instrs.clear(); } }; } namespace { +/// LiveReg - Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonynous namespace + +namespace { class ExeDepsFix : public MachineFunctionPass { static char ID; SpecificBumpPtrAllocator<DomainValue> Allocator; @@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineBasicBlock *MBB; std::vector<int> AliasMap; const unsigned NumRegs; - DomainValue **LiveRegs; - typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; + LiveReg *LiveRegs; + typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; LiveOutMap LiveOuts; - unsigned Distance; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; public: ExeDepsFix(const TargetRegisterClass *rc) @@ -131,26 +156,33 @@ public: virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "SSE execution domain fixup"; + return "Execution dependency fix"; } private: // Register mapping. - int RegIndex(unsigned Reg); + int regIndex(unsigned Reg); // DomainValue allocation. - DomainValue *Alloc(int domain = -1); - void Recycle(DomainValue*); + DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } + void release(DomainValue*); + DomainValue *resolve(DomainValue*&); // LiveRegs manipulations. - void SetLiveReg(int rx, DomainValue *DV); - void Kill(int rx); - void Force(int rx, unsigned domain); - void Collapse(DomainValue *dv, unsigned domain); - bool Merge(DomainValue *A, DomainValue *B); - - void enterBasicBlock(); - void visitGenericInstr(MachineInstr*); + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + void visitInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); }; @@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. -int ExeDepsFix::RegIndex(unsigned Reg) { +int ExeDepsFix::regIndex(unsigned Reg) { assert(Reg < AliasMap.size() && "Invalid register"); return AliasMap[Reg]; } -DomainValue *ExeDepsFix::Alloc(int domain) { +DomainValue *ExeDepsFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); - dv->Dist = Distance; if (domain >= 0) dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); return dv; } -void ExeDepsFix::Recycle(DomainValue *dv) { - assert(dv && "Cannot recycle NULL"); - dv->clear(); - Avail.push_back(dv); +/// release - Release a reference to DV. When the last reference is released, +/// collapse if needed. +void ExeDepsFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +/// resolve - Follow the chain of dead DomainValues until a live DomainValue is +/// reached. Update the referenced pointer when necessary. +DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; } /// Set LiveRegs[rx] = dv, updating reference counts. -void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { +void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs) { - LiveRegs = new DomainValue*[NumRegs]; - std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); - } + assert(LiveRegs && "Must enter basic block first."); - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) return; - if (LiveRegs[rx]) { - assert(LiveRegs[rx]->Refs && "Bad refcount"); - if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); - } - LiveRegs[rx] = dv; - if (dv) ++dv->Refs; + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); } // Kill register rx, recycle or collapse any DomainValue. -void ExeDepsFix::Kill(int rx) { +void ExeDepsFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs || !LiveRegs[rx]) return; - - // Before killing the last reference to an open DomainValue, collapse it to - // the first available domain. - if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) - Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); - else - SetLiveReg(rx, 0); + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; + + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = 0; } /// Force register rx into domain. -void ExeDepsFix::Force(int rx, unsigned domain) { +void ExeDepsFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); - DomainValue *dv; - if (LiveRegs && (dv = LiveRegs[rx])) { + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { if (dv->isCollapsed()) dv->addDomain(domain); else if (dv->hasDomain(domain)) - Collapse(dv, domain); + collapse(dv, domain); else { // This is an incompatible open DomainValue. Collapse it to whatever and // force the new value into domain. This costs a domain crossing. - Collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx] && "Not live after collapse?"); - LiveRegs[rx]->addDomain(domain); + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); } } else { // Set up basic collapsed DomainValue. - SetLiveReg(rx, Alloc(domain)); + setLiveReg(rx, alloc(domain)); } } /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. -void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { +void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { assert(dv->hasDomain(domain) && "Cannot collapse"); // Collapse all the instructions. @@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { // If there are multiple users, give them new, unique DomainValues. if (LiveRegs && dv->Refs > 1) for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == dv) - SetLiveReg(rx, Alloc(domain)); + if (LiveRegs[rx].Value == dv) + setLiveReg(rx, alloc(domain)); } /// Merge - All instructions and registers in B are moved to A, and B is /// released. -bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { +bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); if (A == B) @@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { if (!common) return false; A->AvailableDomains = common; - A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == B) - SetLiveReg(rx, A); + if (LiveRegs[rx].Value == B) + setLiveReg(rx, A); return true; } -void ExeDepsFix::enterBasicBlock() { - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), +// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + SeenUnknownBackEdge = false; + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = 0; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = RegIndex(*i); - if (rx < 0) continue; - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) continue; - DomainValue *pdv = fi->second[rx]; - if (!pdv) continue; - if (!LiveRegs || !LiveRegs[rx]) { - SetLiveReg(rx, pdv); + int rx = regIndex(*i); + if (rx < 0) + continue; + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) + continue; + if (!LiveRegs[rx].Value) { + setLiveReg(rx, pdv); continue; } // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx]->isCollapsed()) { + if (LiveRegs[rx].Value->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - unsigned domain = LiveRegs[rx]->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - Collapse(pdv, domain); + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); continue; } // Currently open, merge in predecessor. if (!pdv->isCollapsed()) - Merge(LiveRegs[rx], pdv); + merge(LiveRegs[rx].Value, pdv); else - Force(rx, pdv->getFirstDomain()); + force(rx, pdv->getFirstDomain()); + } + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); +} + +void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); + // Save live registers at end of MBB - used by enterBasicBlock(). + // Also use LiveOuts as a visited set to detect back-edges. + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i].Value); + delete[] LiveRegs; + } + LiveRegs = 0; +} + +void ExeDepsFix::visitInstr(MachineInstr *MI) { + if (MI->isDebugValue()) + return; + + // Update instructions with explicit execution domains. + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + int rx = regIndex(MO.getReg()); + if (rx < 0) + continue; + + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); } + + ++CurInstr; } // A hard instruction only works in one domain. All input registers will be @@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Force(rx, domain); + force(rx, domain); } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Kill(rx); - Force(rx, domain); + kill(rx); + force(rx, domain); } } @@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx]) { + if (DomainValue *dv = LiveRegs[rx].Value) { // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { else // Open DomainValue is not compatible with instruction. It is useless // now. - Kill(rx); + kill(rx); } } @@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector<DomainValue*,4> doms; + SmallVector<LiveReg, 4> Regs; for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; - DomainValue *dv = LiveRegs[rx]; + const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. - if (!dv->getCommonDomains(available)) { - Kill(*i); + if (!LR.Value->getCommonDomains(available)) { + kill(rx); continue; } - // sorted, uniqued insert. - bool inserted = false; - for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); - i != e && !inserted; ++i) { - if (dv == *i) - inserted = true; - else if (dv->Dist < (*i)->Dist) { - inserted = true; - doms.insert(i, dv); + // Sorted insertion. + bool Inserted = false; + for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); } } - if (!inserted) - doms.push_back(dv); + if (!Inserted) + Regs.push_back(LR); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = 0; - while (!doms.empty()) { + while (!Regs.empty()) { if (!dv) { - dv = doms.pop_back_val(); + dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } - DomainValue *latest = doms.pop_back_val(); - if (Merge(dv, latest)) continue; + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) - if (LiveRegs[*i] == latest) - Kill(*i); + if (LiveRegs[*i].Value == Latest) + kill(*i); } // dv is the DomainValue we are going to use for this instruction. - if (!dv) - dv = Alloc(); - dv->Dist = Distance; - dv->AvailableDomains = available; + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { - Kill(rx); - SetLiveReg(rx, dv); + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); } } } -void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any relevant registers redefined. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - Kill(rx); - } -} - bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - MBB = 0; LiveRegs = 0; - Distance = 0; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << RC->getName() << " **********\n"); + // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { + if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) { anyregs = true; break; } @@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // or -1. AliasMap.resize(TRI->getNumRegs(), -1); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) AliasMap[*AI] = i; } MachineBasicBlock *Entry = MF->begin(); - SmallPtrSet<MachineBasicBlock*, 16> Visited; - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > - DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { - MBB = *DFI; - enterBasicBlock(); + ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); + SmallVector<MachineBasicBlock*, 16> Loops; + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + MachineBasicBlock *MBB = *MBBI; + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) + Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *mi = I; - if (mi->isDebugValue()) continue; - ++Distance; - std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi); - if (domp.first) - if (domp.second) - visitSoftInstr(mi, domp.second); - else - visitHardInstr(mi, domp.first); - else if (LiveRegs) - visitGenericInstr(mi); - } + ++I) + visitInstr(I); + leaveBasicBlock(MBB); + } - // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); - LiveRegs = 0; + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (unsigned i = 0, e = Loops.size(); i != e; ++i) { + MachineBasicBlock *MBB = Loops[i]; + enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); + leaveBasicBlock(MBB); } - // Clear the LiveOuts vectors. Should we also collapse any remaining - // DomainValues? - for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); - i != e; ++i) - delete[] i->second; + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end() || !FI->second) + continue; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (FI->second[i].Value) + release(FI->second[i].Value); + delete[] FI->second; + } LiveOuts.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index a67140ece4a5..2c4a93543cc3 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -32,10 +32,6 @@ namespace { private: virtual bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { - return "Expand ISel Pseudo-instructions"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } @@ -43,12 +39,9 @@ namespace { } // end anonymous namespace char ExpandISelPseudos::ID = 0; +char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", - "Expand CodeGen Pseudo-instructions", false, false) - -FunctionPass *llvm::createExpandISelPseudosPass() { - return new ExpandISelPseudos(); -} + "Expand ISel Pseudo-instructions", false, false) bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.usesCustomInsertionHook()) { + if (MI->usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index e2a14a8dfd97..b14afc286d49 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -36,10 +36,6 @@ public: static char ID; // Pass identification, replacement for typeid ExpandPostRA() : MachineFunctionPass(ID) {} - const char *getPassName() const { - return "Post-RA pseudo instruction expansion pass"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); @@ -61,10 +57,10 @@ private: } // end anonymous namespace char ExpandPostRA::ID = 0; +char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; -FunctionPass *llvm::createExpandPostRAPseudosPass() { - return new ExpandPostRA(); -} +INITIALIZE_PASS(ExpandPostRA, "postrapseudos", + "Post-RA pseudo instruction expansion pass", false, false) /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, /// and the lowered replacement instructions immediately precede it. @@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { ++mi; // Only expand pseudos. - if (!MI->getDesc().isPseudo()) + if (!MI->isPseudo()) continue; // Give targets a chance to expand even standard pseudos. diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index d757cf409d50..1caf8c233976 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - default: llvm_unreachable("Unknown GC point kind"); case GC::Loop: return "loop"; case GC::Return: return "return"; case GC::PreCall: return "pre-call"; case GC::PostCall: return "post-call"; } + llvm_unreachable("Invalid point kind"); } bool Printer::runOnFunction(Function &F) { @@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) { GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); - OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), RE = FD->roots_end(); RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 766c6ee542a9..506b5cf09457 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -10,8 +10,8 @@ // This file implements target- and collector-independent garbage collection // infrastructure. // -// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots -// are identified in SelectionDAGISel. +// GCMachineCodeAnalysis identifies the GC safe points in the machine code. +// Roots are identified in SelectionDAGISel. // //===----------------------------------------------------------------------===// @@ -35,9 +35,9 @@ using namespace llvm; namespace { - + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or - /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as /// directed by the GCStrategy. It also performs automatic root initialization /// and custom intrinsic lowering. class LowerIntrinsics : public FunctionPass { @@ -47,47 +47,46 @@ namespace { bool PerformDefaultLowering(Function &F, GCStrategy &Coll); static bool InsertRootInitializers(Function &F, AllocaInst **Roots, unsigned Count); - + public: static char ID; - + LowerIntrinsics(); const char *getPassName() const; void getAnalysisUsage(AnalysisUsage &AU) const; - + bool doInitialization(Module &M); bool runOnFunction(Function &F); }; - - - /// MachineCodeAnalysis - This is a target-independent pass over the machine + + + /// GCMachineCodeAnalysis - This is a target-independent pass over the machine /// function representation to identify safe points for the garbage collector /// in the machine code. It inserts labels at safe points and populates a /// GCMetadata record for each function. - class MachineCodeAnalysis : public MachineFunctionPass { + class GCMachineCodeAnalysis : public MachineFunctionPass { const TargetMachine *TM; GCFunctionInfo *FI; MachineModuleInfo *MMI; const TargetInstrInfo *TII; - + void FindSafePoints(MachineFunction &MF); void VisitCallPoint(MachineBasicBlock::iterator MI); - MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL) const; - + void FindStackOffsets(MachineFunction &MF); - + public: static char ID; - - MachineCodeAnalysis(); - const char *getPassName() const; + + GCMachineCodeAnalysis(); void getAnalysisUsage(AnalysisUsage &AU) const; - + bool runOnMachineFunction(MachineFunction &MF); }; - + } // ----------------------------------------------------------------------------- @@ -97,6 +96,7 @@ GCStrategy::GCStrategy() : CustomReadBarriers(false), CustomWriteBarriers(false), CustomRoots(false), + CustomSafePoints(false), InitRoots(true), UsesMetadata(false) {} @@ -104,18 +104,24 @@ GCStrategy::GCStrategy() : GCStrategy::~GCStrategy() { for (iterator I = begin(), E = end(); I != E; ++I) delete *I; - + Functions.clear(); } - + bool GCStrategy::initializeCustomLowering(Module &M) { return false; } - + bool GCStrategy::performCustomLowering(Function &F) { dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; + llvm_unreachable("must override performCustomLowering"); +} + + +bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { + dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; llvm_unreachable(0); - return 0; } + GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { GCFunctionInfo *FI = new GCFunctionInfo(F, *this); Functions.push_back(FI); @@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } - + char LowerIntrinsics::ID = 0; LowerIntrinsics::LowerIntrinsics() @@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics() const char *LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; } - + void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { FunctionPass::getAnalysisUsage(AU); AU.addRequired<GCModuleInfo>(); @@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && I->hasGC()) MI->getFunctionInfo(*I); // Instantiate the GC strategy. - + bool MadeChange = false; for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) if (NeedsCustomLoweringPass(**I)) if ((*I)->initializeCustomLowering(M)) MadeChange = true; - + return MadeChange; } -bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, unsigned Count) { // Scroll past alloca instructions. BasicBlock::iterator IP = F.getEntryBlock().begin(); while (isa<AllocaInst>(IP)) ++IP; - + // Search for initializers in the initial BB. SmallPtrSet<AllocaInst*,16> InitedRoots; for (; !CouldBecomeSafePoint(IP); ++IP) @@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) InitedRoots.insert(AI); - + // Add root initializers. bool MadeChange = false; - + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) if (!InitedRoots.count(*I)) { StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( @@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, SI->insertAfter(*I); MadeChange = true; } - + return MadeChange; } @@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { // The natural definition of instructions which could introduce safe points // are: - // + // // - call, invoke (AfterCall, BeforeCall) // - phis (Loops) // - invoke, ret, unwind (Exit) - // + // // However, instructions as seemingly inoccuous as arithmetic can become // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead // it is necessary to take a conservative approach. - + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) || isa<LoadInst>(I)) return false; - + // llvm.gcroot is safe because it doesn't do anything at runtime. if (CallInst *CI = dyn_cast<CallInst>(I)) if (Function *F = CI->getCalledFunction()) if (unsigned IID = F->getIntrinsicID()) if (IID == Intrinsic::gcroot) return false; - + return true; } @@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) { // Quick exit for functions that do not use GC. if (!F.hasGC()) return false; - + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); GCStrategy &S = FI.getStrategy(); - + bool MadeChange = false; - + if (NeedsDefaultLoweringPass(S)) MadeChange |= PerformDefaultLowering(F, S); - + bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); if (UseCustomLoweringPass) MadeChange |= S.performCustomLowering(F); @@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { bool LowerWr = !S.customWriteBarrier(); bool LowerRd = !S.customReadBarrier(); bool InitRoots = S.initializeRoots(); - + SmallVector<AllocaInst*, 32> Roots; - + bool MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { @@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { default: continue; } - + MadeChange = true; } } } - + if (Roots.size()) MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); - + return MadeChange; } // ----------------------------------------------------------------------------- -FunctionPass *llvm::createGCMachineCodeAnalysisPass() { - return new MachineCodeAnalysis(); -} +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; -char MachineCodeAnalysis::ID = 0; +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) -MachineCodeAnalysis::MachineCodeAnalysis() +GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} -const char *MachineCodeAnalysis::getPassName() const { - return "Analyze Machine Code For Garbage Collection"; -} - -void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); AU.addRequired<MachineModuleInfo>(); AU.addRequired<GCModuleInfo>(); } -MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; } -void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { // Find the return address (next instruction), too, so as to bracket the call // instruction. - MachineBasicBlock::iterator RAI = CI; - ++RAI; - + MachineBasicBlock::iterator RAI = CI; + ++RAI; + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); } - + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); } } -void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; ++BBI) for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); MI != ME; ++MI) - if (MI->getDesc().isCall()) + if (MI->isCall()) VisitCallPoint(MI); } -void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { const TargetFrameLowering *TFI = TM->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); - + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), RE = FI->roots_end(); RI != RE; ++RI) RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); } -bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; - + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); if (!FI->getStrategy().needsSafePoints()) return false; - + TM = &MF.getTarget(); MMI = &getAnalysis<MachineModuleInfo>(); TII = TM->getInstrInfo(); - + // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - + // Find all safe points. - FindSafePoints(MF); - + if (FI->getStrategy().customSafePoints()) { + FI->getStrategy().findCustomSafePoints(*FI, MF); + } else { + FindSafePoints(MF); + } + // Find the stack offsets for all roots. FindStackOffsets(MF); - + return false; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ce7ed293daac..75ae5b9c2c27 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); +STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); namespace { class IfConverter : public MachineFunctionPass { @@ -169,7 +170,6 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "If Converter"; } private: bool ReverseBranchCondition(BBInfo &BBI); @@ -195,7 +195,8 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs); + SmallSet<unsigned, 4> &Redefs, + SmallSet<unsigned, 4> *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, SmallSet<unsigned, 4> &Redefs, @@ -251,12 +252,12 @@ namespace { char IfConverter::ID = 0; } +char &llvm::IfConverterID = IfConverter::ID; + INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) -FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } - bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); @@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool RetVal = false; switch (Kind) { - default: assert(false && "Unexpected!"); - break; + default: llvm_unreachable("Unexpected!"); case ICSimple: case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; @@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, // blocks, move the end iterators up past any branch instructions. while (TIE != TIB) { --TIE; - if (!TIE->getDesc().isBranch()) + if (!TIE->isBranch()) break; } while (FIE != FIB) { --FIE; - if (!FIE->getDesc().isBranch()) + if (!FIE->isBranch()) break; } @@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isNotDuplicable()) + if (I->isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; Redefs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Redefs.insert(*Subreg); } @@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, Defs.push_back(Reg); else if (MO.isKill()) { Redefs.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Redefs.erase(*SR); } } @@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, true/*IsImp*/,false/*IsKill*/)); } else { Redefs.insert(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Redefs.insert(*SR); } } @@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICSimpleFalse) if (TII->ReverseBranchCondition(Cond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. @@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) if (TII->ReverseBranchCondition(Cond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { if (ReverseBranchCondition(*CvtBBI)) { @@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); } @@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBInfo *BBI2 = &FalseBBI; SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; SmallVector<MachineOperand, 4> *Cond2 = &RevCond; @@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); - // Predicate the 'true' block after removing its branch. + // Remove branch from 'true' block and remove duplicated instructions. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); DI1 = BBI1->BB->end(); for (unsigned i = 0; i != NumDups2; ) { @@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, ++i; } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); - // Predicate the 'false' block. + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { @@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, if (!DI2->isDebugValue()) --NumDups2; } + + // Remember which registers would later be defined by the false block. + // This allows us not to predicate instructions in the true block that would + // later be re-defined. That is, rather than + // subeq r0, r1, #1 + // addne r0, r1, #1 + // generate: + // sub r0, r1, #1 + // addne r0, r1, #1 + SmallSet<unsigned, 4> RedefsByFalse; + SmallSet<unsigned, 4> ExtUses; + if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { + for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { + if (FI->isDebugValue()) + continue; + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = FI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + Defs.push_back(Reg); + } else if (!RedefsByFalse.count(Reg)) { + // These are defined before ctrl flow reach the 'false' instructions. + // They cannot be modified by the 'true' instructions. + ExtUses.insert(Reg); + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + ExtUses.insert(*SR); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!ExtUses.count(Reg)) { + RedefsByFalse.insert(Reg); + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + RedefsByFalse.insert(*SR); + } + } + } + } + + // Predicate the 'true' block. + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + + // Predicate the 'false' block. PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. @@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { - BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough; // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. @@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return true; } +static bool MaySpeculate(const MachineInstr *MI, + SmallSet<unsigned, 4> &LaterRedefs, + const TargetInstrInfo *TII) { + bool SawStore = true; + if (!MI->isSafeToMove(TII, 0, SawStore)) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef() && !LaterRedefs.count(Reg)) + return false; + } + + return true; +} + /// PredicateBlock - Predicate instructions from the start of the block to the /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs) { + SmallSet<unsigned, 4> &Redefs, + SmallSet<unsigned, 4> *LaterRedefs) { + bool AnyUnpred = false; + bool MaySpec = LaterRedefs != 0; for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (I->isDebugValue() || TII->isPredicated(I)) continue; + // It may be possible not to predicate an instruction if it's the 'true' + // side of a diamond and the 'false' side may re-define the instruction's + // defs. + if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) { + AnyUnpred = true; + continue; + } + // If any instruction is predicated, then every instruction after it must + // be predicated. + MaySpec = false; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; @@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI, BBI.NonPredSize = 0; ++NumIfConvBBs; + if (AnyUnpred) + ++NumUnpred; } /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to @@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && MCID.isBranch()) + if (IgnoreBr && I->isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 726af4696578..d5ea666e4a17 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -14,14 +14,15 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -173,8 +174,7 @@ private: void reMaterializeAll(); bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); - bool foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, + bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, MachineInstr *LoadMI = 0); void insertReload(LiveInterval &NewLI, SlotIndex, MachineBasicBlock::iterator MI); @@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex()); + LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); assert(SrcLR && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = (SrcLR->end == VNI->def); @@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() { if (VNI->isUnused()) continue; MachineInstr *DefMI = 0; + if (!VNI->isPHIDef()) { + DefMI = LIS.getInstructionFromIndex(VNI->def); + assert(DefMI && "No defining instruction"); + } // Check possible sibling copies. - if (VNI->isPHIDef() || VNI->getCopy()) { + if (VNI->isPHIDef() || DefMI->isCopy()) { VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); assert(OrigVNI && "Def outside original live range"); if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } - if (!DefMI && !VNI->isPHIDef()) - DefMI = LIS.getInstructionFromIndex(VNI->def); - if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << " may remat from " << *DefMI); } @@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() { /// a spill at a better location. bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); - VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); - assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; @@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(MII); - VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); ++NumSpills; @@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Find all spills and copies of VNI. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); MachineInstr *MI = UI.skipInstruction();) { - if (!MI->isCopy() && !MI->getDesc().mayStore()) + if (!MI->isCopy() && !MI->mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) @@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { if (unsigned DstReg = isFullCopyOf(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); - VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); assert(DstVNI && "Missing defined value"); - assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); WorkList.push_back(std::make_pair(&DstLI, DstVNI)); } continue; @@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } @@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); - VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); } while (!WorkList.empty()); @@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { @@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; - if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; @@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. - bool Reads, Writes; - SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops); - if (Writes) { - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { - markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); - return false; - } - } + SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; + MIBundleOperands::RegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + if (RI.Tied) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; } // Before rematerializing into a register for a single instruction, try to // fold a load into the instruction. That avoids allocating a new register. - if (RM.OrigMI->getDesc().canFoldAsLoad() && - foldMemoryOperand(MI, Ops, RM.OrigMI)) { + if (RM.OrigMI->canFoldAsLoad() && + foldMemoryOperand(Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); ++NumFoldedLoads; return true; } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Original); NewLI.markNotSpillable(); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - LIS, TII, TRI); + TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); + MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); @@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; @@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // analyzeSiblingValues has already tested all relevant defining instructions. - if (!Edit->anyRematerializable(LIS, TII, AA)) + if (!Edit->anyRematerializable(AA)) return; UsedValues.clear(); @@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::use_nodbg_iterator RI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) + MachineInstr *MI = RI.skipBundle();) anyRemat |= reMaterializeFor(LI, MI); } if (!anyRemat) @@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. for (unsigned i = RegsToSpill.size(); i != 0; --i) { @@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); if (!LI.empty()) continue; - Edit->eraseVirtReg(Reg, LIS); + Edit->eraseVirtReg(Reg); RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); @@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } -/// foldMemoryOperand - Try folding stack slot references in Ops into MI. -/// @param MI Instruction using or defining the current register. -/// @param Ops Operand indices from readsWritesVirtualRegister(). +/// foldMemoryOperand - Try folding stack slot references in Ops into their +/// instructions. +/// +/// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. -/// @return True on success, and MI will be erased. -bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr *LoadMI) { +/// @return True on success. +bool InlineSpiller:: +foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, + MachineInstr *LoadMI) { + if (Ops.empty()) + return false; + // Don't attempt folding in bundles. + MachineInstr *MI = Ops.front().first; + if (Ops.back().first != MI || MI->isBundled()) + return false; + bool WasCopy = MI->isCopy(); + unsigned ImpReg = 0; + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned Idx = Ops[i]; + unsigned Idx = Ops[i].second; MachineOperand &MO = MI->getOperand(Idx); - if (MO.isImplicit()) + if (MO.isImplicit()) { + ImpReg = MO.getReg(); continue; + } // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; @@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, if (!FoldMI) return false; LIS.ReplaceMachineInstrInMaps(MI, FoldMI); - if (!LoadMI) - VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); - DEBUG(dbgs() << "\tfolded: " << *FoldMI); + + // TII.foldMemoryOperand may have left some implicit operands on the + // instruction. Strip them. + if (ImpReg) + for (unsigned i = FoldMI->getNumOperands(); i; --i) { + MachineOperand &MO = FoldMI->getOperand(i - 1); + if (!MO.isReg() || !MO.isImplicit()) + break; + if (MO.getReg() == ImpReg) + FoldMI->RemoveOperand(i - 1); + } + + DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' + << *FoldMI); if (!WasCopy) ++NumFolded; - else if (Ops.front() == 0) + else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; @@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, - LIS.getVNInfoAllocator()); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); ++NumReloads; } @@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); ++NumSpills; } @@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg); + MachineInstr *MI = RegI.skipBundle();) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { @@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { continue; // Analyze instruction. - bool Reads, Writes; - SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; + MIBundleOperands::RegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); - if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; @@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { SnippetCopies.insert(MI); continue; } - if (Writes) { + if (RI.Writes) { // Hoist the spill of a sib-reg copy. if (hoistSpill(OldLI, MI)) { // This COPY is now dead, the value is already in the stack slot. @@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { } // Attempt to fold memory ops. - if (foldMemoryOperand(MI, Ops)) + if (foldMemoryOperand(Ops)) continue; // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Reg); NewLI.markNotSpillable(); - if (Reads) + if (RI.Reads) insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); + MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); MO.setReg(NewLI.reg); if (MO.isUse()) { - if (!MI->isRegTiedToDefOperand(Ops[i])) + if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); } else { if (!MO.isDead()) @@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. - if (Writes) { - if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI)); - } + if (RI.Writes) { + if (hasLiveDef) + insertSpill(NewLI, OldLI, Idx, MI); + else { + // This instruction defines a dead value. We don't need to spill it, + // but do create a live range for the dead value. + VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); + } } DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); @@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() { if (StackSlot == VirtRegMap::NO_STACK_SLOT) { StackSlot = VRM.assignVirt2StackSlot(Original); StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); - StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator()); + StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator()); } else StackInt = &LSS.getInterval(StackSlot); @@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() { MachineInstr *MI = RI.skipInstruction();) { assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. - VRM.RemoveMachineInstrFromMaps(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); } @@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() { // Delete all spilled registers. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i], LIS); + Edit->eraseVirtReg(RegsToSpill[i]); } void InlineSpiller::spill(LiveRangeEdit &edit) { @@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, LIS, Loops); + Edit->calculateRegClassAndHint(MF, Loops); } diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 29b47bd67ece..8368b58880a3 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -1,4 +1,4 @@ -//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,7 @@ #include "InterferenceCache.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" using namespace llvm; @@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; void InterferenceCache::init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, + LiveIntervals *lis, const TargetRegisterInfo *tri) { MF = mf; LIUArray = liuarray; TRI = tri; PhysRegEntries.assign(TRI->getNumRegs(), 0); for (unsigned i = 0; i != CacheEntries; ++i) - Entries[i].clear(mf, indexes); + Entries[i].clear(mf, indexes, lis); } InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { @@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, PhysReg = physReg; Blocks.resize(MF->getNumBlockIDs()); Aliases.clear(); - for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { + for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { LiveIntervalUnion *LIU = LIUArray + *AS; Aliases.push_back(std::make_pair(LIU, LIU->getTag())); } @@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI) { unsigned i = 0, e = Aliases.size(); - for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { + for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { LiveIntervalUnion *LIU = LIUArray + *AS; if (i == e || Aliases[i].first != LIU) return false; @@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); BlockInterference *BI = &Blocks[MBBNum]; + ArrayRef<SlotIndex> RegMaskSlots; + ArrayRef<const uint32_t*> RegMaskBits; for (;;) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); @@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = StartI; } + // Also check for register mask interference. + RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); + RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); + SlotIndex Limit = BI->First.isValid() ? BI->First : Stop; + for (unsigned i = 0, e = RegMaskSlots.size(); + i != e && RegMaskSlots[i] < Limit; ++i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) { + // Register mask i clobbers PhysReg before the LIU interference. + BI->First = RegMaskSlots[i]; + break; + } + PrevPos = Stop; if (BI->First.isValid()) break; @@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { if (Backup) ++I; } + + // Also check for register mask interference. + SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; + for (unsigned i = RegMaskSlots.size(); + i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) { + // Register mask i-1 clobbers PhysReg after the LIU interference. + // Model the regmask clobber as a dead def. + BI->Last = RegMaskSlots[i-1].getDeadSlot(); + break; + } } diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 4df0a9e5c393..485a325aa146 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -18,10 +18,11 @@ namespace llvm { +class LiveIntervals; + class InterferenceCache { const TargetRegisterInfo *TRI; LiveIntervalUnion *LIUArray; - SlotIndexes *Indexes; MachineFunction *MF; /// BlockInterference - information about the interference in a single basic @@ -52,6 +53,9 @@ class InterferenceCache { /// Indexes - Mapping block numbers to SlotIndex ranges. SlotIndexes *Indexes; + /// LIS - Used for accessing register mask interference maps. + LiveIntervals *LIS; + /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; @@ -71,13 +75,14 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {} - void clear(MachineFunction *mf, SlotIndexes *indexes) { + void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); PhysReg = 0; MF = mf; Indexes = indexes; + LIS = lis; } unsigned getPhysReg() const { return PhysReg; } @@ -124,10 +129,10 @@ class InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {} + InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {} /// init - Prepare cache for a new function. - void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, const TargetRegisterInfo *); /// getMaxCursors - Return the maximum number of concurrent cursors that can diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 0f92c2d06bdd..a9ca42f69b97 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics - case Intrinsic::eh_exception: - case Intrinsic::eh_selector: - CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); - break; - case Intrinsic::eh_typeid_for: // Return something different to eh_selector. CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp new file mode 100644 index 000000000000..96a53892f6d3 --- /dev/null +++ b/lib/CodeGen/JITCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/JITCodeEmitter.h" + +using namespace llvm; + +void JITCodeEmitter::anchor() { } diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt new file mode 100644 index 000000000000..fee0347ea659 --- /dev/null +++ b/lib/CodeGen/LLVMBuild.txt @@ -0,0 +1,25 @@ +;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = AsmPrinter SelectionDAG + +[component_0] +type = Library +name = CodeGen +parent = Libraries +required_libraries = Analysis Core MC Scalar Support Target TransformUtils diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 187147a3e252..a1f479a4275f 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,82 +11,42 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; -namespace llvm { - bool EnableFastISel; -} +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt<cl::boolOrDefault> +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, - cl::desc("Disable Post Regalloc")); -static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, - cl::desc("Disable branch folding")); -static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, - cl::desc("Disable tail duplication")); -static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, - cl::desc("Disable pre-register allocation tail duplication")); -static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, - cl::desc("Disable code placement")); -static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, - cl::desc("Disable Stack Slot Coloring")); -static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, - cl::desc("Disable Machine Dead Code Elimination")); -static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, - cl::desc("Disable Machine LICM")); -static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, - cl::desc("Disable Machine Common Subexpression Elimination")); -static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", - cl::Hidden, - cl::desc("Disable Machine LICM")); -static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, - cl::desc("Disable Machine Sinking")); -static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, - cl::desc("Disable Loop Strength Reduction Pass")); -static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, - cl::desc("Disable Codegen Prepare")); -static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, - cl::desc("Print LLVM IR produced by the loop-reduce pass")); -static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, - cl::desc("Print LLVM IR input to isel pass")); -static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, - cl::desc("Dump garbage collector data")); static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, cl::desc("Show encoding in .s output")); static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden, cl::desc("Show instruction structure in .s output")); -static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden, - cl::desc("Enable MC API logging")); -static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, - cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); static cl::opt<cl::boolOrDefault> AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), @@ -94,25 +54,20 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), static bool getVerboseAsm() { switch (AsmVerbose) { - default: case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); case cl::BOU_TRUE: return true; case cl::BOU_FALSE: return false; } + llvm_unreachable("Invalid verbose asm state"); } -// Enable or disable FastISel. Both options are needed, because -// FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -O0. -static cl::opt<cl::boolOrDefault> -EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the \"fast\" instruction selector")); - LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : TargetMachine(T, Triple, CPU, FS) { - CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM); + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); AsmInfo = T.createMCAsmInfo(Triple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and @@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +/// Turn exception handling constructs into something the code generators can +/// handle. +static void addPassesToHandleExceptions(TargetMachine *TM, + PassManagerBase &PM) { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + PM.add(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + PM.add(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + break; + } +} + +/// addPassesToX helper drives creation and initialization of TargetPassConfig. +static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, + PassManagerBase &PM, + bool DisableVerify) { + // Targets may override createPassConfig to provide a target-specific sublass. + TargetPassConfig *PassConfig = TM->createPassConfig(PM); + + // Set PassConfig options provided by TargetMachine. + PassConfig->setDisableVerify(DisableVerify); + + PM.add(PassConfig); + + PassConfig->addIRPasses(); + + addPassesToHandleExceptions(TM, PM); + + PassConfig->addISelPrepare(); + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = + new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), + &TM->getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*TM)); + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && + EnableFastISelOption != cl::BOU_FALSE)) + TM->setFastISel(true); + + // Ask the target for an isel. + if (PassConfig->addInstSelector()) + return NULL; + + PassConfig->addMachinePasses(); + + PassConfig->setInitialized(); + + return Context; +} + bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Context) return true; - assert(Context != 0 && "Failed to get MCContext"); if (hasMCSaveTempLabels()) Context->setAllowTemporaryLabels(false); @@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, OwningPtr<MCStreamer> AsmStreamer; switch (FileType) { - default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, + *getInstrInfo(), + Context->getRegisterInfo(), STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; @@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, getVerboseAsm(), hasMCUseLoc(), hasMCUseCFI(), + hasMCUseDwarfDirectory(), InstPrinter, MCE, MAB, ShowMCInst); @@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, break; } - if (EnableMCLogging) - AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) @@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Ctx = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Context) return true; - addCodeEmitter(PM, OptLevel, JCE); + addCodeEmitter(PM, JCE); PM.add(createGCInfoDeleter()); return false; // success! @@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &Out, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + Ctx = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Ctx) return true; if (hasMCSaveTempLabels()) @@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, + *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; @@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, return false; // success! } - -static void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); -} - -static void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - - if (VerifyMachineCode) - PM.add(createMachineVerifierPass(Banner)); -} - -/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both -/// emitting to assembly files or machine code output. -/// -bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool DisableVerify, - MCContext *&OutContext) { - // Standard LLVM-Level Passes. - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createBasicAliasAnalysisPass()); - - // Before running any passes, run the verifier to determine if the input - // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None && !DisableLSR) { - PM.add(createLoopStrengthReducePass(getTargetLowering())); - if (PrintLSR) - PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); - } - - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - - // Turn exception handling constructs into something the code generators can - // handle. - switch (getMCAsmInfo()->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - PM.add(createSjLjEHPass(getTargetLowering())); - // FALLTHROUGH - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - case ExceptionHandling::Win64: - PM.add(createDwarfEHPass(this)); - break; - case ExceptionHandling::None: - PM.add(createLowerInvokePass(getTargetLowering())); - - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - break; - } - - if (OptLevel != CodeGenOpt::None && !DisableCGP) - PM.add(createCodeGenPreparePass(getTargetLowering())); - - PM.add(createStackProtectorPass(getTargetLowering())); - - addPreISel(PM, OptLevel); - - if (PrintISelInput) - PM.add(createPrintFunctionPass("\n\n" - "*** Final LLVM Code input to ISel ***\n", - &dbgs())); - - // All passes which modify the LLVM IR are now complete; run the verifier - // to ensure that the IR is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Standard Lower-Level Passes. - - // Install a MachineModuleInfo class, which is an immutable pass that holds - // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), - *getRegisterInfo(), - &getTargetLowering()->getObjFileLowering()); - PM.add(MMI); - OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - - // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); - - // Enable FastISel with -fast, but allow that to be overridden. - if (EnableFastISelOption == cl::BOU_TRUE || - (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) - EnableFastISel = true; - - // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) - return true; - - // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection"); - - // Expand pseudo-instructions emitted by ISel. - PM.add(createExpandISelPseudosPass()); - - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - - // Optimize PHIs before DCE: removing dead PHI cycles may make more - // instructions dead. - if (OptLevel != CodeGenOpt::None) - PM.add(createOptimizePHIsPass()); - - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - PM.add(createLocalStackSlotAllocationPass()); - - if (OptLevel != CodeGenOpt::None) { - // With optimization, dead code should already be eliminated. However - // there is one known exception: lowered code for arguments that are only - // used by tail calls, where the tail calls reuse the incoming stack - // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - if (!DisableMachineDCE) - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass"); - - if (!DisableMachineLICM) - PM.add(createMachineLICMPass()); - if (!DisableMachineCSE) - PM.add(createMachineCSEPass()); - if (!DisableMachineSink) - PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); - - PM.add(createPeepholeOptimizerPass()); - printAndVerify(PM, "After codegen peephole optimization pass"); - } - - // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PreRegAlloc passes"); - - // Perform register allocation. - PM.add(createRegisterAllocator(OptLevel)); - printAndVerify(PM, "After Register Allocation"); - - // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - if (!DisableSSC) - PM.add(createStackSlotColoringPass(false)); - - // Run post-ra machine LICM to hoist reloads / remats. - if (!DisablePostRAMachineLICM) - PM.add(createMachineLICMPass(false)); - - printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); - } - - // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PostRegAlloc passes"); - - PM.add(createExpandPostRAPseudosPass()); - printAndVerify(PM, "After ExpandPostRAPseudos"); - - // Insert prolog/epilog code. Eliminate abstract frame index references... - PM.add(createPrologEpilogCodeInserter()); - printAndVerify(PM, "After PrologEpilogCodeInserter"); - - // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) - printAndVerify(PM, "After PreSched2 passes"); - - // Second pass scheduler. - if (OptLevel != CodeGenOpt::None && !DisablePostRA) { - PM.add(createPostRAScheduler(OptLevel)); - printAndVerify(PM, "After PostRAScheduler"); - } - - // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { - PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printNoVerify(PM, "After BranchFolding"); - } - - // Tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { - PM.add(createTailDuplicatePass(false)); - printNoVerify(PM, "After TailDuplicate"); - } - - PM.add(createGCMachineCodeAnalysisPass()); - - if (PrintGCInfo) - PM.add(createGCInfoPrinter(dbgs())); - - if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { - PM.add(createCodePlacementOptPass()); - printNoVerify(PM, "After CodePlacementOpt"); - } - - if (addPreEmitPass(PM, OptLevel)) - printNoVerify(PM, "After PreEmit passes"); - - return false; -} diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 0eb009ddac29..deab05a412c9 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { // Finally, just to provide a stable ordering, use the node number as a // deciding factor. - return LHSNum < RHSNum; + return RHSNum < LHSNum; } @@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) { } -// ScheduledNode - As nodes are scheduled, we look to see if there are any +// scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. -void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { AdjustPriorityOfUnscheduledPreds(I->getSUnit()); diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index a12e1a36d113..f1abcbb1dd5c 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return Result; } +void LexicalScope::anchor() { } + /// dump - Print data structures. void LexicalScope::dump() const { #ifndef NDEBUG diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 3dfe4c0e8cfa..2187833031ee 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -226,7 +226,7 @@ public: LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + UserValueScopes &UVS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex(); + LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); do { @@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(MI); - LocMap::iterator I = locInts.find(Idx.getUseIndex()); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; if (!LIS.hasInterval(DstReg)) continue; LiveInterval *DstLI = &LIS.getInterval(DstReg); - const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); - assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); CopyValues.push_back(std::make_pair(DstLI, DstVNI)); } @@ -620,7 +620,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -841,7 +841,7 @@ bool UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can - // safely erase unuused locations. + // safely erase unused locations. for (unsigned i = locations.size(); i ; --i) { unsigned LocNo = i-1; const MachineOperand *Loc = &locations[LocNo]; @@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); - } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && - VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { // FIXME: Translate SubIdx to a stackslot offset. Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); } else { @@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, } // Don't insert anything after the first terminator, though. - return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() : - llvm::next(MachineBasicBlock::iterator(MI)); + return MI->isTerminator() ? MBB->getFirstTerminator() : + llvm::next(MachineBasicBlock::iterator(MI)); } DebugLoc UserValue::findDebugLoc() { diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index b69945aea98f..ac18843ac30d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other, for (unsigned i = 0; i != NumVals; ++i) { unsigned LHSValID = LHSValNoAssignments[i]; if (i != LHSValID || - (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) { MustMapCurValNos = true; + break; + } } // If we have to apply a mapping to our base interval assignment, rewrite it // now. if (MustMapCurValNos) { // Map the first live range. + iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - ++OutIt; - for (iterator I = OutIt, E = end(); I != E; ++I) { - OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + for (iterator I = next(OutIt), E = end(); I != E; ++I) { + VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one LiveRange. This happens when we - // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. - if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) { - (OutIt-1)->end = OutIt->end; + // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == nextValNo && OutIt->end == I->start) { + OutIt->end = I->end; } else { - if (I != OutIt) { + // Didn't merge. Move OutIt to the next interval, + ++OutIt; + OutIt->valno = nextValNo; + if (OutIt != I) { OutIt->start = I->start; OutIt->end = I->end; } - - // Didn't merge, on to the next one. - ++OutIt; } } - // If we merge some live ranges, chop off the end. + ++OutIt; ranges.erase(OutIt, end()); } @@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "-phidef"; if (vni->hasPHIKill()) OS << "-phikill"; - if (vni->hasRedefByEC()) - OS << "-ec"; } } } @@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = - LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. - if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) EqClass.join(VNI->id, UVNI->id); } } @@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], continue; // DBG_VALUE instructions should have been eliminated earlier. SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isUse()); const VNInfo *VNI = LI.getVNInfoAt(Idx); assert(VNI && "Interval not live at use."); MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index b1e202a273d3..3ade66097cbd 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -15,31 +15,22 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "liveintervals" +#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "VirtRegMap.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -52,19 +43,14 @@ static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(LiveVariables) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) -INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) @@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveVariables>(); AU.addPreserved<LiveVariables>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - - if (!StrongPHIElim) { - AU.addPreservedID(PHIEliminationID); - AU.addRequiredID(PHIEliminationID); - } - - AU.addRequiredID(TwoAddressInstructionPassID); - AU.addPreserved<ProcessImplicitDefs>(); - AU.addRequired<ProcessImplicitDefs>(); AU.addPreserved<SlotIndexes>(); AU.addRequiredTransitive<SlotIndexes>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() { delete I->second; r2iMap_.clear(); + RegMaskSlots.clear(); + RegMaskBits.clear(); + RegMaskBlocks.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); - while (!CloneMIs.empty()) { - MachineInstr *MI = CloneMIs.back(); - CloneMIs.pop_back(); - mf_->DeleteMachineInstr(MI); - } } /// runOnMachineFunction - Register allocate the whole function @@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { lv_ = &getAnalysis<LiveVariables>(); indexes_ = &getAnalysis<SlotIndexes>(); allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); computeIntervals(); @@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { /// print - Implement the dump method. void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; - for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second->print(OS, tri_); - OS << "\n"; - } + + // Dump the physregs. + for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg) + if (const LiveInterval *LI = r2iMap_.lookup(Reg)) { + LI->print(OS, tri_); + OS << '\n'; + } + + // Dump the virtregs. + for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg) + if (const LiveInterval *LI = + r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) { + LI->print(OS, tri_); + OS << '\n'; + } printInstrs(OS); } @@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } -bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - // We don't handle fancy stuff crossing basic block boundaries - if (li.ranges.size() != 1) - return true; - const LiveRange &range = li.ranges.front(); - SlotIndex idx = range.start.getBaseIndex(); - SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); - - // Skip deleted instructions - MachineInstr *firstMI = getInstructionFromIndex(idx); - while (!firstMI && idx != end) { - idx = idx.getNextIndex(); - firstMI = getInstructionFromIndex(idx); - } - if (!firstMI) - return false; - - // Find last instruction in range - SlotIndex lastIdx = end.getPrevIndex(); - MachineInstr *lastMI = getInstructionFromIndex(lastIdx); - while (!lastMI && lastIdx != idx) { - lastIdx = lastIdx.getPrevIndex(); - lastMI = getInstructionFromIndex(lastIdx); - } - if (!lastMI) - return false; - - // Range cannot cross basic block boundaries or terminators - MachineBasicBlock *MBB = firstMI->getParent(); - if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) - return true; - - MachineBasicBlock::const_iterator E = lastMI; - ++E; - for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { - const MachineInstr &MI = *I; - - // Allow copies to and from li.reg - if (MI.isCopy()) - if (MI.getOperand(0).getReg() == li.reg || - MI.getOperand(1).getReg() == li.reg) - continue; - - // Check for operands using reg - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand& mop = MI.getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; - } - } - - // No conflicts found. - return false; -} - -bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, - SmallPtrSet<MachineInstr*,32> &JoinedCopies) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - MachineInstr *MI = getInstructionFromIndex(index); - if (!MI) - continue; // skip deleted instructions - - if (JoinedCopies.count(MI)) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || PhysReg == Reg || - TargetRegisterInfo::isVirtualRegister(PhysReg)) - continue; - if (tri_->regsOverlap(Reg, PhysReg)) - return true; - } - } - } - - return false; -} - static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); @@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, if (!MO.getSubReg() || MO.isEarlyClobber()) return false; - SlotIndex RedefIndex = MIIdx.getDefIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); if (DefMI != 0) { return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; @@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getDefIndex(); - // Earlyclobbers move back one, so that they overlap the live range - // of inputs. - if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); - - // Make sure the first definition is not a partial redefinition. Add an - // <imp-def> of the full register. - // FIXME: LiveIntervals shouldn't modify the code like this. Whoever - // created the machine instruction should annotate it with <undef> flags - // as needed. Then we can simply assert here. The REG_SEQUENCE lowering - // is the main suspect. - if (MO.getSubReg()) { - mi->addRegisterDefined(interval.reg); - // Mark all defs of interval.reg on this instruction as reading <undef>. - for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO2 = mi->getOperand(i); - if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg()) - MO2.setIsUndef(); - } - } + SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - MachineInstr *CopyMI = NULL; - if (mi->isCopyLike()) { - CopyMI = mi; - } + // Make sure the first definition is not a partial redefinition. + assert(!MO.readsReg() && "First def cannot also read virtual register " + "missing <undef> flag?"); - VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); // Loop over all of the blocks that the vreg is defined in. There are @@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // FIXME: what about dead vars? SlotIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex(); + killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); else - killIdx = defIndex.getStoreIndex(); + killIdx = defIndex.getDeadSlot(); // If the kill happens after the definition, we have an intra-block // live range. @@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) if (PHIJoin) { assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); - ValNo = interval.getNextValue(Start, 0, VNInfoAllocator); + ValNo = interval.getNextValue(Start, VNInfoAllocator); ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); @@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - SlotIndex RedefIndex = MIIdx.getDefIndex(); - if (MO.isEarlyClobber()) - RedefIndex = MIIdx.getUseIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getDefIndex(); + SlotIndex DefIndex = OldValNo->def.getRegSlot(); // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. @@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); // Value#0 is now defined by the 2-addr instruction. - OldValNo->def = RedefIndex; - OldValNo->setCopy(0); - - // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... - if (PartReDef && mi->isCopyLike()) - OldValNo->setCopy(&*mi); + OldValNo->def = RedefIndex; // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(), + interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); DEBUG({ @@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live until the end of the block. We've already taken care of the // rest of the live range. - SlotIndex defIndex = MIIdx.getDefIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(); if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + defIndex = MIIdx.getRegSlot(true); - VNInfo *ValNo; - MachineInstr *CopyMI = NULL; - if (mi->isCopyLike()) - CopyMI = mi; - ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); @@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << '\n'); } +static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) { + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + SI != SE; ++SI) { + const MachineBasicBlock* succ = *SI; + if (succ->isLiveIn(Reg)) + return true; + } + return false; +} + void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator mi, SlotIndex MIIdx, MachineOperand& MO, - LiveInterval &interval, - MachineInstr *CopyMI) { - // A physical register cannot be live across basic block, so its - // lifetime must end somewhere in its defining basic block. + LiveInterval &interval) { DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getDefIndex(); - // Earlyclobbers move back one. - if (MO.isEarlyClobber()) - start = MIIdx.getUseIndex(); + SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); SlotIndex end = start; // If it is not used after definition, it is considered dead at @@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // advance below compensates. if (MO.isDead()) { DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); goto exit; } @@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); if (DefIdx != -1) { if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber()); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that // defines it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); } goto exit; } @@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, baseIndex = baseIndex.getNextIndex(); } - // The only case we should have a dead physreg here without a killing or - // instruction where we know it's dead is if it is live-in to the function - // and never used. Another possible case is the implicit use of the - // physical register has been deleted by two-address pass. - end = start.getStoreIndex(); + // If we get here the register *should* be live out. + assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!"); + // FIXME: We need saner rules for reserved regs. + if (isReserved(interval.reg)) { + end = start.getDeadSlot(); + } else { + // Unreserved, unallocable registers like EFLAGS can be live across basic + // block boundaries. + assert(isRegLiveIntoSuccessor(MBB, interval.reg) && + "Unreserved reg not live-out?"); + end = getMBBEndIdx(MBB); + } exit: assert(start < end && "did not find end of interval?"); @@ -567,9 +436,7 @@ exit: VNInfo *ValNo = interval.getVNInfoAt(start); bool Extend = ValNo != 0; if (!Extend) - ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator); - if (Extend && MO.isEarlyClobber()) - ValNo->setHasRedefByEC(true); + ValNo = interval.getNextValue(start, VNInfoAllocator); LiveRange LR(start, end, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << '\n'); @@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else { - MachineInstr *CopyMI = NULL; - if (MI->isCopyLike()) - CopyMI = MI; + else handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(MO.getReg()), CopyMI); - } + getOrCreateInterval(MO.getReg())); } void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex MIIdx, - LiveInterval &interval, bool isAlias) { + LiveInterval &interval) { + assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) && + "Only physical registers can be live in."); + assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() || + MBB->isLandingPad()) && + "Allocatable live-ins only valid for entry blocks and landing pads."); + DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_)); // Look for kills, if it reaches a def before it's killed, then it shouldn't @@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != E) { if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); SeenDefUse = true; break; - } else if (mi->definesRegister(interval.reg, tri_)) { + } else if (mi->modifiesRegister(interval.reg, tri_)) { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); SeenDefUse = true; break; } @@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Live-in register might not be used at all. if (!SeenDefUse) { - if (isAlias) { + if (isAllocatable(interval.reg) || + !isRegLiveIntoSuccessor(MBB, interval.reg)) { + // Allocatable registers are never live through. + // Non-allocatable registers that aren't live into any successors also + // aren't live through. DEBUG(dbgs() << " dead"); - end = MIIdx.getStoreIndex(); + return; } else { + // If we get here the register is non-allocatable and live into some + // successor. We'll conservatively assume it's live-through. DEBUG(dbgs() << " live through"); end = getMBBEndIdx(MBB); } @@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex defIdx = getMBBStartIdx(MBB); assert(getInstructionFromIndex(defIdx) == 0 && "PHI def index points at actual instruction."); - VNInfo *vni = - interval.getNextValue(defIdx, 0, VNInfoAllocator); + VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); @@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() { << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); + RegMaskBlocks.resize(mf_->getNumBlockIDs()); + SmallVector<unsigned, 8> UndefUses; for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; + RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); + if (MBB->empty()) continue; @@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() { for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - // Multiple live-ins can alias the same register. - for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) - if (!hasInterval(*AS)) - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), - true); } // Skip over empty initial indices. @@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() { DEBUG(dbgs() << MIIndex << "\t" << *MI); if (MI->isDebugValue()) continue; + assert(indexes_->getInstructionFromIndex(MIIndex) == MI && + "Lost SlotIndex synchronization"); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { MachineOperand &MO = MI->getOperand(i); + + // Collect register masks. + if (MO.isRegMask()) { + RegMaskSlots.push_back(MIIndex.getRegSlot()); + RegMaskBits.push_back(MO.getRegMask()); + continue; + } + if (!MO.isReg() || !MO.getReg()) continue; @@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() { // Move to the next instr slot. MIIndex = indexes_->getNextNonNullIndex(MIIndex); } + + // Compute the number of register mask instructions in this block. + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.second = RegMaskSlots.size() - RMB.first;; } // Create empty intervals for registers defined by implicit_def's (except @@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) - && "Can't only shrink physical registers"); + && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; @@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); - VNInfo *VNI = li->getVNInfoAt(Idx); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + // Note: This intentionally picks up the wrong VNI in case of an EC redef. + // See below. + VNInfo *VNI = li->getVNInfoBefore(Idx); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting <undef> flags @@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, << *li << '\n'); continue; } - if (VNI->def == Idx) { - // Special case: An early-clobber tied operand reads and writes the - // register one slot early. - Idx = Idx.getPrevSlot(); - VNI = li->getVNInfoAt(Idx); + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. The getVNInfoBefore call above would have + // picked up the value defined by UseMI. Adjust the kill slot and value. + if (SlotIndex::isSameInstr(VNI->def, Idx)) { + Idx = VNI->def; + VNI = li->getVNInfoBefore(Idx); assert(VNI && "Early-clobber tied value not available"); } WorkList.push_back(std::make_pair(Idx, VNI)); @@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); - - // A use tied to an early-clobber def ends at the load slot and isn't caught - // above. Catch it here instead. This probably only ever happens for inline - // assembly. - if (VNI->def.isUse()) - if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) - WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); + NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx); + const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) { + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoAt(Stop)) + if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); } continue; @@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); + SlotIndex Stop = getMBBEndIdx(*PI); + assert(li->getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); } } @@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getNextSlot()) + if (LII->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. @@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Register allocator hooks. // -MachineBasicBlock::iterator -LiveIntervals::getLastSplitPoint(const LiveInterval &li, - MachineBasicBlock *mbb) const { - const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor(); - - // If li is not live into a landing pad, we can insert spill code before the - // first terminator. - if (!lpad || !isLiveInToMBB(li, lpad)) - return mbb->getFirstTerminator(); - - // When there is a landing pad, spill code must go before the call instruction - // that can throw. - MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin(); - while (I != B) { - --I; - if (I->getDesc().isCall()) - return I; - } - // The block contains no calls that can throw, so use the first terminator. - return mbb->getFirstTerminator(); -} - void LiveIntervals::addKillFlags() { for (iterator I = begin(), E = end(); I != E; ++I) { unsigned Reg = I->first; @@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() { // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { - // A LOAD index indicates an MBB edge. - if (RI->end.isLoad()) + // A block index indicates an MBB edge. + if (RI->end.isBlock()) continue; MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) @@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, if (Reg == 0 || Reg == li.reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - !allocatableRegs_[Reg]) + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg)) continue; - // FIXME: For now, only remat MI with at most one register operand. - assert(!RegOp && - "Can't rematerialize instruction with multiple register operand!"); RegOp = MO.getReg(); -#ifndef NDEBUG - break; -#endif + break; // Found vreg operand - leave the loop. } return RegOp; } @@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI) { - bool Dummy2; - return isReMaterializable(li, ValNo, MI, 0, Dummy2); -} - /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool @@ -1044,1141 +892,653 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// FilterFoldedOps - Filter out two-address use operands. Return -/// true if it finds any issue with the operands that ought to prevent -/// folding. -static bool FilterFoldedOps(MachineInstr *MI, - SmallVector<unsigned, 2> &Ops, - unsigned &MRInfo, - SmallVector<unsigned, 2> &FoldOps) { - MRInfo = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned OpIdx = Ops[i]; - MachineOperand &MO = MI->getOperand(OpIdx); - // FIXME: fold subreg use. - if (MO.getSubReg()) - return true; - if (MO.isDef()) - MRInfo |= (unsigned)VirtRegMap::isMod; - else { - // Filter out two-address use operand(s). - if (MI->isRegTiedToDefOperand(OpIdx)) { - MRInfo = VirtRegMap::isModRef; - continue; - } - MRInfo |= (unsigned)VirtRegMap::isRef; - } - FoldOps.push_back(OpIdx); - } - return false; +MachineBasicBlock* +LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { + // A local live range must be fully contained inside the block, meaning it is + // defined and killed at instructions, not at block boundaries. It is not + // live in or or out of any block. + // + // It is technically possible to have a PHI-defined live range identical to a + // single block, but we are going to return false in that case. + + SlotIndex Start = LI.beginIndex(); + if (Start.isBlock()) + return NULL; + + SlotIndex Stop = LI.endIndex(); + if (Stop.isBlock()) + return NULL; + + // getMBBFromIndex doesn't need to search the MBB table when both indexes + // belong to proper instructions. + MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop); + return MBB1 == MBB2 ? MBB1 : NULL; } +float +LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { + // Limit the loop depth ridiculousness. + if (loopDepth > 200) + loopDepth = 200; -/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from -/// slot / to reg or any rematerialized load into ith operand of specified -/// MI. If it is successul, MI is updated with the newly created MI and -/// returns true. -bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, - VirtRegMap &vrm, MachineInstr *DefMI, - SlotIndex InstrIdx, - SmallVector<unsigned, 2> &Ops, - bool isSS, int Slot, unsigned Reg) { - // If it is an implicit def instruction, just delete it. - if (MI->isImplicitDef()) { - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++numFolds; - return true; - } + // The loop depth is used to roughly estimate the number of times the + // instruction is executed. Something like 10^d is simple, but will quickly + // overflow a float. This expression behaves like 10^d for small d, but is + // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of + // headroom before overflow. + // By the way, powf() might be unavailable here. For consistency, + // We may take pow(double,double). + float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector<unsigned, 2> FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; + return (isDef + isUse) * lc; +} - // The only time it's safe to fold into a two address instruction is when - // it's folding reload and spill from / into a spill stack slot. - if (DefMI && (MRInfo & VirtRegMap::isMod)) - return false; +LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, + MachineInstr* startInst) { + LiveInterval& Interval = getOrCreateInterval(reg); + VNInfo* VN = Interval.getNextValue( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getVNInfoAllocator()); + VN->setHasPHIKill(true); + LiveRange LR( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getMBBEndIdx(startInst->getParent()), VN); + Interval.addRange(LR); - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) - : tii_->foldMemoryOperand(MI, FoldOps, DefMI); - if (fmi) { - // Remember this instruction uses the spill slot. - if (isSS) vrm.addSpillSlotUse(Slot, fmi); - - // Attempt to fold the memory reference into the instruction. If - // we can do this, we don't need to insert spill code. - if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) - vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); - vrm.transferSpillPts(MI, fmi); - vrm.transferRestorePts(MI, fmi); - vrm.transferEmergencySpills(MI, fmi); - ReplaceMachineInstrInMaps(MI, fmi); - MI->eraseFromParent(); - MI = fmi; - ++numFolds; - return true; - } - return false; + return LR; } -/// canFoldMemoryOperand - Returns true if the specified load / store -/// folding is possible. -bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, - SmallVector<unsigned, 2> &Ops, - bool ReMat) const { - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector<unsigned, 2> FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + +//===----------------------------------------------------------------------===// +// Register mask functions +//===----------------------------------------------------------------------===// + +bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, + BitVector &UsableRegs) { + if (LI.empty()) return false; + LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + + // Use a smaller arrays for local live ranges. + ArrayRef<SlotIndex> Slots; + ArrayRef<const uint32_t*> Bits; + if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) { + Slots = getRegMaskSlotsInBlock(MBB->getNumber()); + Bits = getRegMaskBitsInBlock(MBB->getNumber()); + } else { + Slots = getRegMaskSlots(); + Bits = getRegMaskBits(); + } - // It's only legal to remat for a use, not a def. - if (ReMat && (MRInfo & VirtRegMap::isMod)) + // We are going to enumerate all the register mask slots contained in LI. + // Start with a binary search of RegMaskSlots to find a starting point. + ArrayRef<SlotIndex>::iterator SlotI = + std::lower_bound(Slots.begin(), Slots.end(), LiveI->start); + ArrayRef<SlotIndex>::iterator SlotE = Slots.end(); + + // No slots in range, LI begins after the last call. + if (SlotI == SlotE) return false; - return tii_->canFoldMemoryOperand(MI, FoldOps); + bool Found = false; + for (;;) { + assert(*SlotI >= LiveI->start); + // Loop over all slots overlapping this segment. + while (*SlotI < LiveI->end) { + // *SlotI overlaps LI. Collect mask bits. + if (!Found) { + // This is the first overlap. Initialize UsableRegs to all ones. + UsableRegs.clear(); + UsableRegs.resize(tri_->getNumRegs(), true); + Found = true; + } + // Remove usable registers clobbered by this mask. + UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]); + if (++SlotI == SlotE) + return Found; + } + // *SlotI is beyond the current LI segment. + LiveI = LI.advanceTo(LiveI, *SlotI); + if (LiveI == LiveE) + return Found; + // Advance SlotI until it overlaps. + while (*SlotI < LiveI->start) + if (++SlotI == SlotE) + return Found; + } } -bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { - LiveInterval::Ranges::const_iterator itr = li.ranges.begin(); +//===----------------------------------------------------------------------===// +// IntervalUpdate class. +//===----------------------------------------------------------------------===// - MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end); +// HMEditor is a toolkit used by handleMove to trim or extend live intervals. +class LiveIntervals::HMEditor { +private: + LiveIntervals& LIS; + const MachineRegisterInfo& MRI; + const TargetRegisterInfo& TRI; + SlotIndex NewIdx; + + typedef std::pair<LiveInterval*, LiveRange*> IntRangePair; + typedef DenseSet<IntRangePair> RangeSet; + + struct RegRanges { + LiveRange* Use; + LiveRange* EC; + LiveRange* Dead; + LiveRange* Def; + RegRanges() : Use(0), EC(0), Dead(0), Def(0) {} + }; + typedef DenseMap<unsigned, RegRanges> BundleRanges; + +public: + HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, + const TargetRegisterInfo& TRI, SlotIndex NewIdx) + : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {} + + // Update intervals for all operands of MI from OldIdx to NewIdx. + // This assumes that MI used to be at OldIdx, and now resides at + // NewIdx. + void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) { + assert(NewIdx != OldIdx && "No-op move? That's a bit strange."); + + // Collect the operands. + RangeSet Entering, Internal, Exiting; + bool hasRegMaskOp = false; + collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); + + // To keep the LiveRanges valid within an interval, move the ranges closest + // to the destination first. This prevents ranges from overlapping, to that + // APIs like removeRange still work. + if (NewIdx < OldIdx) { + moveAllEnteringFrom(OldIdx, Entering); + moveAllInternalFrom(OldIdx, Internal); + moveAllExitingFrom(OldIdx, Exiting); + } + else { + moveAllExitingFrom(OldIdx, Exiting); + moveAllInternalFrom(OldIdx, Internal); + moveAllEnteringFrom(OldIdx, Entering); + } - if (mbb == 0) - return false; + if (hasRegMaskOp) + updateRegMaskSlots(OldIdx); - for (++itr; itr != li.ranges.end(); ++itr) { - MachineBasicBlock *mbb2 = - indexes_->getMBBCoveringRange(itr->start, itr->end); +#ifndef NDEBUG + LIValidator validator; + std::for_each(Entering.begin(), Entering.end(), validator); + std::for_each(Internal.begin(), Internal.end(), validator); + std::for_each(Exiting.begin(), Exiting.end(), validator); + assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); +#endif - if (mbb2 != mbb) - return false; } - return true; -} + // Update intervals for all operands of MI to refer to BundleStart's + // SlotIndex. + void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) { + if (MI == BundleStart) + return; // Bundling instr with itself - nothing to do. + + SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI); + assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI && + "SlotIndex <-> Instruction mapping broken for MI"); + + // Collect all ranges already in the bundle. + MachineBasicBlock::instr_iterator BII(BundleStart); + RangeSet Entering, Internal, Exiting; + bool hasRegMaskOp = false; + collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) { + if (&*BII == MI) + continue; + collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + } -/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of -/// interval on to-be re-materialized operands of MI) with new register. -void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, - MachineInstr *MI, unsigned NewVReg, - VirtRegMap &vrm) { - // There is an implicit use. That means one of the other operand is - // being remat'ed and the remat'ed instruction has li.reg as an - // use operand. Make sure we rewrite that as well. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (!vrm.isReMaterialized(Reg)) - continue; - MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); - MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); - if (UseMO) - UseMO->setReg(NewVReg); - } -} + BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); -/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions -/// for addIntervalsForSpills to rewrite uses / defs for the given live range. -bool LiveIntervals:: -rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, - MachineInstr *MI, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector<int, 4> &ReMatIds, - const MachineLoopInfo *loopInfo, - unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, - DenseMap<unsigned,unsigned> &MBBVRegsMap, - std::vector<LiveInterval*> &NewLIs) { - bool CanFold = false; - RestartInstruction: - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned Reg = mop.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (Reg != li.reg) - continue; + collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - bool TryFold = !DefIsReMat; - bool FoldSS = true; // Default behavior unless it's a remat. - int FoldSlot = Slot; - if (DefIsReMat) { - // If this is the rematerializable definition MI itself and - // all of its uses are rematerialized, simply delete it. - if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " - << *MI << '\n'); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - break; - } + DEBUG(dbgs() << "Entering: " << Entering.size() << "\n"); + DEBUG(dbgs() << "Internal: " << Internal.size() << "\n"); + DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n"); - // If def for this use can't be rematerialized, then try folding. - // If def is rematerializable and it's a load, also try folding. - TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); - if (isLoad) { - // Try fold loads (from stack slot, constant pool, etc.) into uses. - FoldSS = isLoadSS; - FoldSlot = LdSlot; - } - } + moveAllEnteringFromInto(OldIdx, Entering, BR); + moveAllInternalFromInto(OldIdx, Internal, BR); + moveAllExitingFromInto(OldIdx, Exiting, BR); - // Scan all of the operands of this instruction rewriting operands - // to use NewVReg instead of li.reg as appropriate. We do this for - // two reasons: - // - // 1. If the instr reads the same spilled vreg multiple times, we - // want to reuse the NewVReg. - // 2. If the instr is a two-addr instruction, we are required to - // keep the src/dst regs pinned. - // - // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. - SmallVector<unsigned, 2> Ops; - tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); - - // Create a new virtual register for the spill interval. - // Create the new register now so we can map the fold instruction - // to the new register so when it is unfolded we get the correct - // answer. - bool CreatedNewVReg = false; - if (NewVReg == 0) { - NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - CreatedNewVReg = true; - - // The new virtual register should get the same allocation hints as the - // old one. - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg); - if (Hint.first || Hint.second) - mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); - } - if (!TryFold) - CanFold = false; - else { - // Do not fold load / store here if we are splitting. We'll find an - // optimal point to insert a load / store later. - if (!TrySplit) { - if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, FoldSS, FoldSlot, NewVReg)) { - // Folding the load/store can completely change the instruction in - // unpredictable ways, rescan it from the beginning. - - if (FoldSS) { - // We need to give the new vreg the same stack slot as the - // spilled interval. - vrm.assignVirt2StackSlot(NewVReg, FoldSlot); - } - - HasUse = false; - HasDef = false; - CanFold = false; - if (isNotInMIMap(MI)) - break; - goto RestartInstruction; - } - } else { - // We'll try to fold it later if it's profitable. - CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); - } - } +#ifndef NDEBUG + LIValidator validator; + std::for_each(Entering.begin(), Entering.end(), validator); + std::for_each(Internal.begin(), Internal.end(), validator); + std::for_each(Exiting.begin(), Exiting.end(), validator); + assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); +#endif + } - mop.setReg(NewVReg); - if (mop.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - - // Reuse NewVReg for other reads. - bool HasEarlyClobber = false; - for (unsigned j = 0, e = Ops.size(); j != e; ++j) { - MachineOperand &mopj = MI->getOperand(Ops[j]); - mopj.setReg(NewVReg); - if (mopj.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - if (mopj.isEarlyClobber()) - HasEarlyClobber = true; - } +private: - if (CreatedNewVReg) { - if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); - if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { - // Each valnum may have its own remat id. - ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); - } else { - vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); - } - if (!CanDelete || (HasUse && HasDef)) { - // If this is a two-addr instruction then its use operands are - // rematerializable but its def is not. It should be assigned a - // stack slot. - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else { - vrm.assignVirt2StackSlot(NewVReg, Slot); +#ifndef NDEBUG + class LIValidator { + private: + DenseSet<const LiveInterval*> Checked, Bogus; + public: + void operator()(const IntRangePair& P) { + const LiveInterval* LI = P.first; + if (Checked.count(LI)) + return; + Checked.insert(LI); + if (LI->empty()) + return; + SlotIndex LastEnd = LI->begin()->start; + for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end(); + LRI != LRE; ++LRI) { + const LiveRange& LR = *LRI; + if (LastEnd > LR.start || LR.start >= LR.end) + Bogus.insert(LI); + LastEnd = LR.end; } - } else if (HasUse && HasDef && - vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { - // If this interval hasn't been assigned a stack slot (because earlier - // def is a deleted remat def), do it now. - assert(Slot != VirtRegMap::NO_STACK_SLOT); - vrm.assignVirt2StackSlot(NewVReg, Slot); } - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI. - if (DefIsReMat && ImpUse) - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - - // Create a new register interval for this spill / remat. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (CreatedNewVReg) { - NewLIs.push_back(&nI); - MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); - if (TrySplit) - vrm.setIsSplitFromReg(NewVReg, li.reg); + bool rangesOk() const { + return Bogus.empty(); } + }; +#endif - if (HasUse) { - if (CreatedNewVReg) { - LiveRange LR(index.getLoadIndex(), index.getDefIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } else { - // Extend the split live interval to this def / use. - SlotIndex End = index.getDefIndex(); - LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, - nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); + // Collect IntRangePairs for all operands of MI that may need fixing. + // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes' + // maps). + void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal, + RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) { + hasRegMaskOp = false; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& MO = *MOI; + + if (MO.isRegMask()) { + hasRegMaskOp = true; + continue; } - } - if (HasDef) { - // An early clobber starts at the use slot, except for an early clobber - // tied to a use operand (yes, that is a thing). - LiveRange LR(HasEarlyClobber && !HasUse ? - index.getUseIndex() : index.getDefIndex(), - index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - DEBUG({ - dbgs() << "\t\t\t\tAdded new interval: "; - nI.print(dbgs(), tri_); - dbgs() << '\n'; - }); - } - return CanFold; -} -bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, - const VNInfo *VNI, - MachineBasicBlock *MBB, - SlotIndex Idx) const { - return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); -} + if (!MO.isReg() || MO.getReg() == 0) + continue; -/// RewriteInfo - Keep track of machine instrs that will be rewritten -/// during spilling. -namespace { - struct RewriteInfo { - SlotIndex Index; - MachineInstr *MI; - RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} - }; + unsigned Reg = MO.getReg(); - struct RewriteInfoCompare { - bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { - return LHS.Index < RHS.Index; - } - }; -} + // TODO: Currently we're skipping uses that are reserved or have no + // interval, but we're not updating their kills. This should be + // fixed. + if (!LIS.hasInterval(Reg) || + (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + continue; -void LiveIntervals:: -rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, - LiveInterval::Ranges::const_iterator &I, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector<int, 4> &ReMatIds, - const MachineLoopInfo *loopInfo, - BitVector &SpillMBBs, - DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes, - BitVector &RestoreMBBs, - DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes, - DenseMap<unsigned,unsigned> &MBBVRegsMap, - std::vector<LiveInterval*> &NewLIs) { - bool AllCanFold = true; - unsigned NewVReg = 0; - SlotIndex start = I->start.getBaseIndex(); - SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - - // First collect all the def / use in this live range that will be rewritten. - // Make sure they are sorted according to instruction index. - std::vector<RewriteInfo> RewriteMIs; - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineInstr *MI = &*ri; - MachineOperand &O = ri.getOperand(); - ++ri; - if (MI->isDebugValue()) { - // Modify DBG_VALUE now that the value is in a spill slot. - if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - int FI = isLoadSS ? LdSlot : (int)Slot; - if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - ReplaceMachineInstrInMaps(MI, NewDV); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - continue; + LiveInterval* LI = &LIS.getInterval(Reg); + + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + if (MO.isEarlyClobber()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true)); + assert(LR != 0 && "No EC range?"); + if (LR->end > OldIdx.getDeadSlot()) + Exiting.insert(std::make_pair(LI, LR)); + else + Internal.insert(std::make_pair(LI, LR)); + } else if (MO.isDead()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); + assert(LR != 0 && "No dead-def range?"); + Internal.insert(std::make_pair(LI, LR)); + } else { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot()); + assert(LR && LR->end > OldIdx.getDeadSlot() && + "Non-dead-def should have live range exiting."); + Exiting.insert(std::make_pair(LI, LR)); } } - - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - continue; } - assert(!(O.isImplicit() && O.isUse()) && - "Spilling register that's used as implicit use?"); - SlotIndex index = getInstructionIndex(MI); - if (index < start || index >= end) - continue; - - if (O.isUndef()) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - continue; - RewriteMIs.push_back(RewriteInfo(index, MI)); } - std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); - - unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; - // Now rewrite the defs and uses. - for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { - RewriteInfo &rwi = RewriteMIs[i]; - ++i; - SlotIndex index = rwi.Index; - MachineInstr *MI = rwi.MI; - // If MI def and/or use the same register multiple times, then there - // are multiple entries. - while (i != e && RewriteMIs[i].MI == MI) { - assert(RewriteMIs[i].Index == index); - ++i; - } - MachineBasicBlock *MBB = MI->getParent(); - if (ImpUse && MI != ReMatDefMI) { - // Re-matting an instruction with virtual register use. Prevent interval - // from being spilled. - getInterval(ImpUse).markNotSpillable(); - } + // Collect IntRangePairs for all operands of MI that may need fixing. + void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering, + RangeSet& Exiting, SlotIndex MIStartIdx, + SlotIndex MIEndIdx) { + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& MO = *MOI; + assert(!MO.isRegMask() && "Can't have RegMasks in bundles."); + if (!MO.isReg() || MO.getReg() == 0) + continue; - unsigned MBBId = MBB->getNumber(); - unsigned ThisVReg = 0; - if (TrySplit) { - DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId); - if (NVI != MBBVRegsMap.end()) { - ThisVReg = NVI->second; - // One common case: - // x = use - // ... - // ... - // def = ... - // = use - // It's better to start a new interval to avoid artificially - // extend the new interval. - if (MI->readsWritesVirtualRegister(li.reg) == - std::make_pair(false,true)) { - MBBVRegsMap.erase(MBB->getNumber()); - ThisVReg = 0; - } - } - } + unsigned Reg = MO.getReg(); + + // TODO: Currently we're skipping uses that are reserved or have no + // interval, but we're not updating their kills. This should be + // fixed. + if (!LIS.hasInterval(Reg) || + (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + continue; - bool IsNew = ThisVReg == 0; - if (IsNew) { - // This ends the previous live interval. If all of its def / use - // can be folded, give it a low spill weight. - if (NewVReg && TrySplit && AllCanFold) { - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; + LiveInterval* LI = &LIS.getInterval(Reg); + + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles."); + assert(!MO.isDead() && "Dead-defs not allowed in bundles."); + LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot()); + assert(LR != 0 && "Internal ranges not allowed in bundles."); + Exiting.insert(std::make_pair(LI, LR)); } - AllCanFold = true; } - NewVReg = ThisVReg; - - bool HasDef = false; - bool HasUse = false; - bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, - index, end, MI, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, - ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); - if (!HasDef && !HasUse) - continue; + } - AllCanFold &= CanFold; + BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) { + BundleRanges BR; - // Update weight of spill interval. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (!TrySplit) { - // The spill weight is now infinity as it cannot be spilled again. - nI.markNotSpillable(); - continue; + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) { + LiveInterval* LI = EI->first; + LiveRange* LR = EI->second; + BR[LI->reg].Use = LR; } - // Keep track of the last def and first use in each MBB. - if (HasDef) { - if (MI != ReMatOrigDefMI || !CanDelete) { - bool HasKill = false; - if (!HasUse) - HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex()); - else { - // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex()); - if (VNI) - HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex()); - } - DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = - SpillIdxes.find(MBBId); - if (!HasKill) { - if (SII == SpillIdxes.end()) { - std::vector<SRInfo> S; - S.push_back(SRInfo(index, NewVReg, true)); - SpillIdxes.insert(std::make_pair(MBBId, S)); - } else if (SII->second.back().vreg != NewVReg) { - SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if (index > SII->second.back().index) { - // If there is an earlier def and this is a two-address - // instruction, then it's not possible to fold the store (which - // would also fold the load). - SRInfo &Info = SII->second.back(); - Info.index = index; - Info.canFold = !HasUse; - } - SpillMBBs.set(MBBId); - } else if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) { - // There is an earlier def that's not killed (must be two-address). - // The spill is no longer needed. - SII->second.pop_back(); - if (SII->second.empty()) { - SpillIdxes.erase(MBBId); - SpillMBBs.reset(MBBId); - } - } + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) { + LiveInterval* LI = II->first; + LiveRange* LR = II->second; + if (LR->end.isDead()) { + BR[LI->reg].Dead = LR; + } else { + BR[LI->reg].EC = LR; } } - if (HasUse) { - DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = - SpillIdxes.find(MBBId); - if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) - // Use(s) following the last def, it's not safe to fold the spill. - SII->second.back().canFold = false; - DenseMap<unsigned, std::vector<SRInfo> >::iterator RII = - RestoreIdxes.find(MBBId); - if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) - // If we are splitting live intervals, only fold if it's the first - // use and there isn't another use later in the MBB. - RII->second.back().canFold = false; - else if (IsNew) { - // Only need a reload if there isn't an earlier def / use. - if (RII == RestoreIdxes.end()) { - std::vector<SRInfo> Infos; - Infos.push_back(SRInfo(index, NewVReg, true)); - RestoreIdxes.insert(std::make_pair(MBBId, Infos)); - } else { - RII->second.push_back(SRInfo(index, NewVReg, true)); - } - RestoreMBBs.set(MBBId); - } + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) { + LiveInterval* LI = EI->first; + LiveRange* LR = EI->second; + BR[LI->reg].Def = LR; } - // Update spill weight. - unsigned loopDepth = loopInfo->getLoopDepth(MBB); - nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); + return BR; } - if (NewVReg && TrySplit && AllCanFold) { - // If all of its def / use can be folded, give it a low spill weight. - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; + void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) { + MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx); + if (!OldKillMI->killsRegister(reg)) + return; // Bail out if we don't have kill flags on the old register. + MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); + assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); + assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill."); + OldKillMI->clearRegisterKills(reg, &TRI); + NewKillMI->addRegisterKilled(reg, &TRI); } -} -bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return false; - std::vector<SRInfo> &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && - Restores[i].vreg == vr && - Restores[i].canFold) - return true; - return false; -} - -void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return; - std::vector<SRInfo> &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = SlotIndex(); -} + void updateRegMaskSlots(SlotIndex OldIdx) { + SmallVectorImpl<SlotIndex>::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(*RI == OldIdx && "No RegMask at OldIdx."); + *RI = NewIdx; + assert(*prior(RI) < *RI && *RI < *next(RI) && + "RegSlots out of order. Did you move one call across another?"); + } -/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being -/// spilled and create empty intervals for their uses. -void -LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, - const TargetRegisterClass* rc, - std::vector<LiveInterval*> &NewLIs) { - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineOperand &O = ri.getOperand(); - MachineInstr *MI = &*ri; - ++ri; - if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - continue; - } - if (O.isDef()) { - assert(MI->isImplicitDef() && - "Register def was not rewritten?"); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } else { - // This must be an use of an implicit_def so it's not part of the live - // interval. Create a new empty live interval for it. - // FIXME: Can we simply erase some of the instructions? e.g. Stores? - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.setIsImplicitlyDefined(NewVReg); - NewLIs.push_back(&getOrCreateInterval(NewVReg)); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == li.reg) { - MO.setReg(NewVReg); - MO.setIsUndef(); - } - } + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { + SlotIndex LastUse = NewIdx; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; } + return LastUse; } -} - -float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { - // Limit the loop depth ridiculousness. - if (loopDepth > 200) - loopDepth = 200; - - // The loop depth is used to roughly estimate the number of times the - // instruction is executed. Something like 10^d is simple, but will quickly - // overflow a float. This expression behaves like 10^d for small d, but is - // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of - // headroom before overflow. - // By the way, powf() might be unavailable here. For consistency, - // We may take pow(double,double). - float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - - return (isDef + isUse) * lc; -} -static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) - NewLIs[i]->weight = - normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize()); -} + void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + bool LiveThrough = LR->end > OldIdx.getRegSlot(); + if (LiveThrough) + return; + SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); + if (LastUse != NewIdx) + moveKillFlags(LI->reg, NewIdx, LastUse); + LR->end = LastUse.getRegSlot(); + } -std::vector<LiveInterval*> LiveIntervals:: -addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> *SpillIs, - const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(dbgs(), tri_); - dbgs() << '\n'; - }); - - // Each bit specify whether a spill is required in the MBB. - BitVector SpillMBBs(mf_->getNumBlockIDs()); - DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes; - BitVector RestoreMBBs(mf_->getNumBlockIDs()); - DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes; - DenseMap<unsigned,unsigned> MBBVRegsMap; - std::vector<LiveInterval*> NewLIs; - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - unsigned NumValNums = li.getNumValNums(); - SmallVector<MachineInstr*, 4> ReMatDefs; - ReMatDefs.resize(NumValNums, NULL); - SmallVector<MachineInstr*, 4> ReMatOrigDefs; - ReMatOrigDefs.resize(NumValNums, NULL); - SmallVector<int, 4> ReMatIds; - ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); - BitVector ReMatDelete(NumValNums); - unsigned Slot = VirtRegMap::MAX_STACK_SLOT; - - // Spilling a split live interval. It cannot be split any further. Also, - // it's also guaranteed to be a single val# / range interval. - if (vrm.getPreSplitReg(li.reg)) { - vrm.setIsSplitFromReg(li.reg, 0); - // Unset the split kill marker on the last use. - SlotIndex KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx != SlotIndex()) { - MachineInstr *KillMI = getInstructionFromIndex(KillIdx); - assert(KillMI && "Last use disappeared?"); - int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); - assert(KillOp != -1 && "Last use disappeared?"); - KillMI->getOperand(KillOp).setIsKill(false); - } - vrm.removeKillPoint(li.reg); - bool DefIsReMat = vrm.isReMaterialized(li.reg); - Slot = vrm.getStackSlot(li.reg); - assert(Slot != VirtRegMap::MAX_STACK_SLOT); - MachineInstr *ReMatDefMI = DefIsReMat ? - vrm.getReMaterializedMI(li.reg) : NULL; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); - bool IsFirstRange = true; - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - // If this is a split live interval with multiple ranges, it means there - // are two-address instructions that re-defined the value. Only the - // first def can be rematerialized! - if (IsFirstRange) { - // Note ReMatOrigDefMI has already been deleted. - rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } else { - rewriteInstructionsForSpills(li, false, I, NULL, 0, - Slot, 0, false, false, false, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); + void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + // Extend the LiveRange if NewIdx is past the end. + if (NewIdx > LR->end) { + // Move kill flags if OldIdx was not originally the end + // (otherwise LR->end points to an invalid slot). + if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { + assert(LR->end > OldIdx && "LiveRange does not cover original slot"); + moveKillFlags(LI->reg, LR->end, NewIdx); } - IsFirstRange = false; + LR->end = NewIdx.getRegSlot(); } - - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; } - bool TrySplit = !intervalIsInOneMBB(li); - if (TrySplit) - ++numSplits; - bool NeedStackSlot = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - bool dummy; - if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { - // Remember how to remat the def of this val#. - ReMatOrigDefs[VN] = ReMatDefMI; - // Original def may be modified so we have to make a copy here. - MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - CloneMIs.push_back(Clone); - ReMatDefs[VN] = Clone; - - bool CanDelete = true; - if (VNI->hasPHIKill()) { - // A kill is a phi node, not all of its uses can be rematerialized. - // It must not be deleted. - CanDelete = false; - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - if (CanDelete) - ReMatDelete.set(VN); + void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) { + bool GoingUp = NewIdx < OldIdx; + + if (GoingUp) { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringUpFrom(OldIdx, *EI); } else { - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringDownFrom(OldIdx, *EI); } } - // One stack slot per live interval. - if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { - if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) - Slot = vrm.assignVirt2StackSlot(li.reg); - - // This case only occurs when the prealloc splitter has already assigned - // a stack slot to this vreg. - else - Slot = vrm.getStackSlot(li.reg); + void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && + LR->end <= OldIdx.getDeadSlot() && + "Range should be internal to OldIdx."); + LiveRange Tmp(*LR); + Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber()); + Tmp.valno->def = Tmp.start; + Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot(); + LI->removeRange(*LR); + LI->addRange(Tmp); } - // Create new intervals and rewrite defs and uses. - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; - MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; - bool DefIsReMat = ReMatDefMI != NULL; - bool CanDelete = ReMatDelete[I->valno->id]; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); - rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); + void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) { + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) + moveInternalFrom(OldIdx, *II); } - // Insert spills / restores if we are splitting. - if (!TrySplit) { - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; + void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveRange* LR = P.second; + assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && + "Range should start in OldIdx."); + assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx."); + SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber()); + LR->start = NewStart; + LR->valno->def = NewStart; } - SmallPtrSet<LiveInterval*, 4> AddedKill; - SmallVector<unsigned, 2> Ops; - if (NeedStackSlot) { - int Id = SpillMBBs.find_first(); - while (Id != -1) { - std::vector<SRInfo> &spills = SpillIdxes[Id]; - for (unsigned i = 0, e = spills.size(); i != e; ++i) { - SlotIndex index = spills[i].index; - unsigned VReg = spills[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - bool FoundUse = false; - Ops.clear(); - if (spills[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - Ops.push_back(j); - if (MO.isDef()) - continue; - if (isReMat || - (!FoundUse && !alsoFoldARestore(Id, index, VReg, - RestoreMBBs, RestoreIdxes))) { - // MI has two-address uses of the same register. If the use - // isn't the first and only use in the BB, then we can't fold - // it. FIXME: Move this to rewriteInstructionsForSpills. - CanFold = false; - break; - } - FoundUse = true; - } - } - // Fold the store into the def if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ - Folded = true; - if (FoundUse) { - // Also folded uses, do not issue a load. - eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - } - nI.removeRange(index.getDefIndex(), index.getStoreIndex()); - } - } - - // Otherwise tell the spiller to issue a spill. - if (!Folded) { - LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; - bool isKill = LR->end == index.getStoreIndex(); - if (!MI->registerDefIsDead(nI.reg)) - // No need to spill a dead def. - vrm.addSpillPoint(VReg, isKill, MI); - if (isKill) - AddedKill.insert(&nI); - } - } - Id = SpillMBBs.find_next(Id); - } + void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) { + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) + moveExitingFrom(OldIdx, *EI); } - int Id = RestoreMBBs.find_first(); - while (Id != -1) { - std::vector<SRInfo> &restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = restores.size(); i != e; ++i) { - SlotIndex index = restores[i].index; - if (index == SlotIndex()) - continue; - unsigned VReg = restores[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - Ops.clear(); - if (restores[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - if (MO.isDef()) { - // If this restore were to be folded, it would have been folded - // already. - CanFold = false; - break; - } - Ops.push_back(j); - } - } + void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + bool LiveThrough = LR->end > OldIdx.getRegSlot(); + if (LiveThrough) { + assert((LR->start < NewIdx || BR[LI->reg].Def == LR) && + "Def in bundle should be def range."); + assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && + "If bundle has use for this reg it should be LR."); + BR[LI->reg].Use = LR; + return; + } - // Fold the load into the use if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (!isReMat) - Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); - else { - MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); - int LdSlot = 0; - bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - // If the rematerializable def is a load, also try to fold it. - if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) - Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, isLoadSS, LdSlot, VReg); - if (!Folded) { - unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); - if (ImpUse) { - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI and mark the register - // interval as unspillable. - LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.markNotSpillable(); - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - } - } - } - } - // If folding is not possible / failed, then tell the spiller to issue a - // load / rematerialization for us. - if (Folded) - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - else - vrm.addRestorePoint(VReg, MI); + SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); + moveKillFlags(LI->reg, OldIdx, LastUse); + + if (LR->start < NewIdx) { + // Becoming a new entering range. + assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 && + "Bundle shouldn't be re-defining reg mid-range."); + assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && + "Bundle shouldn't have different use range for same reg."); + LR->end = LastUse.getRegSlot(); + BR[LI->reg].Use = LR; + } else { + // Becoming a new Dead-def. + assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) && + "Live range starting at unexpected slot."); + assert(BR[LI->reg].Def == LR && "Reg should have def range."); + assert(BR[LI->reg].Dead == 0 && + "Can't have def and dead def of same reg in a bundle."); + LR->end = LastUse.getDeadSlot(); + BR[LI->reg].Dead = BR[LI->reg].Def; + BR[LI->reg].Def = 0; } - Id = RestoreMBBs.find_next(Id); } - // Finalize intervals: add kills, finalize spill weights, and filter out - // dead intervals. - std::vector<LiveInterval*> RetNewLIs; - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { - LiveInterval *LI = NewLIs[i]; - if (!LI->empty()) { - if (!AddedKill.count(LI)) { - LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - SlotIndex LastUseIdx = LR->end.getBaseIndex(); - MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); - int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); - assert(UseIdx != -1); - if (!LastUse->isRegTiedToDefOperand(UseIdx)) { - LastUse->getOperand(UseIdx).setIsKill(); - vrm.addKillPoint(LI->reg, LastUseIdx); - } - } - RetNewLIs.push_back(LI); + void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + if (NewIdx > LR->end) { + // Range extended to bundle. Add to bundle uses. + // Note: Currently adds kill flags to bundle start. + assert(BR[LI->reg].Use == 0 && + "Bundle already has use range for reg."); + moveKillFlags(LI->reg, LR->end, NewIdx); + LR->end = NewIdx.getRegSlot(); + BR[LI->reg].Use = LR; + } else { + assert(BR[LI->reg].Use != 0 && + "Bundle should already have a use range for reg."); } } - handleSpilledImpDefs(li, vrm, rc, RetNewLIs); - normalizeSpillWeights(RetNewLIs); - return RetNewLIs; -} - -/// hasAllocatableSuperReg - Return true if the specified physical register has -/// any super register that's allocatable. -bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) - if (allocatableRegs_[*AS] && hasInterval(*AS)) - return true; - return false; -} + void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering, + BundleRanges& BR) { + bool GoingUp = NewIdx < OldIdx; -/// getRepresentativeReg - Find the largest super register of the specified -/// physical register. -unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. - unsigned BestReg = Reg; - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { - unsigned SuperReg = *AS; - if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { - BestReg = SuperReg; - break; + if (GoingUp) { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringUpFromInto(OldIdx, *EI, BR); + } else { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringDownFromInto(OldIdx, *EI, BR); } } - return BestReg; -} -/// getNumConflictsWithPhysReg - Return the number of uses and defs of the -/// specified interval that conflicts with the specified physical register. -unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, - unsigned PhysReg) const { - unsigned NumConflicts = 0; - const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue()) - continue; - SlotIndex Index = getInstructionIndex(MI); - if (pli.liveAt(Index)) - ++NumConflicts; + void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + // TODO: Sane rules for moving ranges into bundles. } - return NumConflicts; -} -/// spillPhysRegAroundRegDefsUses - Spill the specified physical register -/// around all defs and uses of the specified interval. Return true if it -/// was able to cut its interval. -bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, - unsigned PhysReg, VirtRegMap &vrm) { - unsigned SpillReg = getRepresentativeReg(PhysReg); - - DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg) - << " represented by " << tri_->getName(SpillReg) << '\n'); - - for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) - // If there are registers which alias PhysReg, but which are not a - // sub-register of the chosen representative super register. Assert - // since we can't handle it yet. - assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || - tri_->isSuperRegister(*AS, SpillReg)); - - bool Cut = false; - SmallVector<unsigned, 4> PRegs; - if (hasInterval(SpillReg)) - PRegs.push_back(SpillReg); - for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR) - if (hasInterval(*SR)) - PRegs.push_back(*SR); - - DEBUG({ - dbgs() << "Trying to spill:"; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) - dbgs() << ' ' << tri_->getName(PRegs[i]); - dbgs() << '\n'; - }); - - SmallPtrSet<MachineInstr*, 8> SeenMIs; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue() || SeenMIs.count(MI)) - continue; - SeenMIs.insert(MI); - SlotIndex Index = getInstructionIndex(MI); - bool LiveReg = false; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { - unsigned PReg = PRegs[i]; - LiveInterval &pli = getInterval(PReg); - if (!pli.liveAt(Index)) - continue; - LiveReg = true; - SlotIndex StartIdx = Index.getLoadIndex(); - SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); - if (!pli.isInOneLiveRange(StartIdx, EndIdx)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, tm_); - } - report_fatal_error(Msg.str()); + void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal, + BundleRanges& BR) { + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) + moveInternalFromInto(OldIdx, *II, BR); + } + + void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + + assert(LR->start.isRegister() && + "Don't know how to merge exiting ECs into bundles yet."); + + if (LR->end > NewIdx.getDeadSlot()) { + // This range is becoming an exiting range on the bundle. + // If there was an old dead-def of this reg, delete it. + if (BR[LI->reg].Dead != 0) { + LI->removeRange(*BR[LI->reg].Dead); + BR[LI->reg].Dead = 0; + } + assert(BR[LI->reg].Def == 0 && + "Can't have two defs for the same variable exiting a bundle."); + LR->start = NewIdx.getRegSlot(); + LR->valno->def = LR->start; + BR[LI->reg].Def = LR; + } else { + // This range is becoming internal to the bundle. + assert(LR->end == NewIdx.getRegSlot() && + "Can't bundle def whose kill is before the bundle"); + if (BR[LI->reg].Dead || BR[LI->reg].Def) { + // Already have a def for this. Just delete range. + LI->removeRange(*LR); + } else { + // Make range dead, record. + LR->end = NewIdx.getDeadSlot(); + BR[LI->reg].Dead = LR; + assert(BR[LI->reg].Use == LR && + "Range becoming dead should currently be use."); } - pli.removeRange(StartIdx, EndIdx); - LiveReg = true; + // In both cases the range is no longer a use on the bundle. + BR[LI->reg].Use = 0; } - if (!LiveReg) - continue; - DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI); - vrm.addEmergencySpill(SpillReg, MI); - Cut = true; } - return Cut; -} -LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { - LiveInterval& Interval = getOrCreateInterval(reg); - VNInfo* VN = Interval.getNextValue( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), - startInst, getVNInfoAllocator()); - VN->setHasPHIKill(true); - LiveRange LR( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), - getMBBEndIdx(startInst->getParent()), VN); - Interval.addRange(LR); + void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting, + BundleRanges& BR) { + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) + moveExitingFromInto(OldIdx, *EI, BR); + } - return LR; +}; + +void LiveIntervals::handleMove(MachineInstr* MI) { + SlotIndex OldIndex = indexes_->getInstructionIndex(MI); + indexes_->removeMachineInstrFromMaps(MI); + SlotIndex NewIndex = MI->isInsideBundle() ? + indexes_->getInstructionIndex(MI) : + indexes_->insertMachineInstrInMaps(MI); + assert(getMBBStartIdx(MI->getParent()) <= OldIndex && + OldIndex < getMBBEndIdx(MI->getParent()) && + "Cannot handle moves across basic block boundaries."); + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); + + HMEditor HME(*this, *mri_, *tri_, NewIndex); + HME.moveAllRangesFrom(MI, OldIndex); } +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) { + SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart); + HMEditor HME(*this, *mri_, *tri_, NewIndex); + HME.moveAllRangesInto(MI, BundleStart); +} diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 110fe1e62024..60a68806c55e 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -21,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> + using namespace llvm; diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index 5d64d285f39a..dbf5ac122d5d 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -20,8 +20,6 @@ #include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/LiveInterval.h" -#include <algorithm> - namespace llvm { class MachineLoopRange; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index a7d5af5198e5..d8ab7918ae25 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(DomTree && "Missing dominator tree"); MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); - assert(Kill && "No MBB at Kill"); + assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) @@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc); + VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b23f85165360..695f53631e1b 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -1,4 +1,4 @@ -//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===// +//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, - LiveIntervals &LIS, - VirtRegMap &VRM) { - MachineRegisterInfo &MRI = VRM.getRegInfo(); +void LiveRangeEdit::Delegate::anchor() { } + +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - VRM.grow(); - VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + if (VRM) { + VRM->grow(); + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } LiveInterval &LI = LIS.getOrCreateInterval(VReg); newRegs_.push_back(&LI); return LI; @@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo &tii, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); scannedRemattable_ = true; - if (!tii.isTriviallyReMaterializable(DefMI, aa)) + if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; remattable_.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (LiveInterval::vni_iterator I = parent_.vni_begin(), E = parent_.vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - checkRematerializable(VNI, DefMI, tii, aa); + checkRematerializable(VNI, DefMI, aa); } scannedRemattable_ = true; } -bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { if (!scannedRemattable_) - scanRemattable(lis, tii, aa); + scanRemattable(aa); return !remattable_.empty(); } @@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, - LiveIntervals &lis) { - OrigIdx = OrigIdx.getUseIndex(); - UseIdx = UseIdx.getUseIndex(); + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. - if (MO.isUndef() || !lis.hasInterval(MO.getReg())) + if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) continue; - // We cannot depend on virtual registers in uselessRegs_. - if (uselessRegs_) - for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui) - if ((*uselessRegs_)[ui]->reg == MO.getReg()) - return false; - LiveInterval &li = lis.getInterval(MO.getReg()); + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; @@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis) { + bool cheapAsAMove) { assert(scannedRemattable_ && "Call anyRematerializable first"); // Use scanRemattable info. @@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, // No defining instruction provided. SlotIndex DefIdx; if (RM.OrigMI) - DefIdx = lis.getInstructionIndex(RM.OrigMI); + DefIdx = LIS.getInstructionIndex(RM.OrigMI); else { DefIdx = RM.ParentVNI->def; - RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); assert(RM.OrigMI && "No defining instruction for remattable value"); } // If only cheap remats were requested, bail out early. - if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) + if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) return false; return true; @@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, - LiveIntervals &lis, - const TargetInstrInfo &tii, const TargetRegisterInfo &tri, bool Late) { assert(RM.OrigMI && "Invalid remat"); - tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) - .getDefIndex(); + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getRegSlot(); } -void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, - SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo &MRI, - LiveIntervals &LIS, - const TargetInstrInfo &TII) { + SmallVectorImpl<MachineInstr*> &Dead) { MachineInstr *DefMI = 0, *UseMI = 0; // Check that there is a single def and a single use. @@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (MO.isDef()) { if (DefMI && DefMI != MI) return false; - if (!MI->getDesc().canFoldAsLoad()) + if (!MI->canFoldAsLoad()) return false; DefMI = MI; } else if (!MO.isUndef()) { @@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals &LIS, VirtRegMap &VRM, - const TargetInstrInfo &TII) { + ArrayRef<unsigned> RegsBeingSpilled) { SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; - MachineRegisterInfo &MRI = VRM.getRegInfo(); for (;;) { // Erase all dead defs. while (!Dead.empty()) { MachineInstr *MI = Dead.pop_back_val(); assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); // Never delete inline asm. if (MI->isInlineAsm()) { @@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, LI.removeValNo(VNI); if (LI.empty()) { ToShrink.remove(&LI); - eraseVirtReg(Reg, LIS); + eraseVirtReg(Reg); } } } @@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); - if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + if (foldAsLoad(LI, Dead)) continue; if (delegate_) delegate_->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; + + // Don't create new intervals for a register being spilled. + // The new intervals would have to be spilled anyway so its not worth it. + // Also they currently aren't spilled so creating them and not spilling + // them results in incorrect code. + bool BeingSpilled = false; + for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { + if (LI->reg == RegsBeingSpilled[i]) { + BeingSpilled = true; + break; + } + } + + if (BeingSpilled) continue; // LI may have been separated, create new intervals. LI->RenumberValues(LIS); @@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; - bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; + bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + Dups.push_back(&createFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (IsOriginal) - VRM.setIsSplitFromReg(Dups.back()->reg, 0); + VRM->setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } @@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - LiveIntervals &LIS, const MachineLoopInfo &Loops) { VirtRegAuxInfo VRAI(MF, LIS, Loops); - MachineRegisterInfo &MRI = MF.getRegInfo(); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h deleted file mode 100644 index 9b0a671ea9e5..000000000000 --- a/lib/CodeGen/LiveRangeEdit.h +++ /dev/null @@ -1,206 +0,0 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The LiveRangeEdit class represents changes done to a virtual register when it -// is spilled or split. -// -// The parent register is never changed. Instead, a number of new virtual -// registers are created and added to the newRegs vector. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H -#define LLVM_CODEGEN_LIVERANGEEDIT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LiveInterval.h" - -namespace llvm { - -class AliasAnalysis; -class LiveIntervals; -class MachineLoopInfo; -class MachineRegisterInfo; -class VirtRegMap; - -class LiveRangeEdit { -public: - /// Callback methods for LiveRangeEdit owners. - struct Delegate { - /// Called immediately before erasing a dead machine instruction. - virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} - - /// Called when a virtual register is no longer used. Return false to defer - /// its deletion from LiveIntervals. - virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } - - /// Called before shrinking the live range of a virtual register. - virtual void LRE_WillShrinkVirtReg(unsigned) {} - - /// Called after cloning a virtual register. - /// This is used for new registers representing connected components of Old. - virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} - }; - -private: - LiveInterval &parent_; - SmallVectorImpl<LiveInterval*> &newRegs_; - Delegate *const delegate_; - const SmallVectorImpl<LiveInterval*> *uselessRegs_; - - /// firstNew_ - Index of the first register added to newRegs_. - const unsigned firstNew_; - - /// scannedRemattable_ - true when remattable values have been identified. - bool scannedRemattable_; - - /// remattable_ - Values defined by remattable instructions as identified by - /// tii.isTriviallyReMaterializable(). - SmallPtrSet<const VNInfo*,4> remattable_; - - /// rematted_ - Values that were actually rematted, and so need to have their - /// live range trimmed or entirely removed. - SmallPtrSet<const VNInfo*,4> rematted_; - - /// scanRemattable - Identify the parent_ values that may rematerialize. - void scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa); - - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, LiveIntervals &lis); - - /// foldAsLoad - If LI has a single use and a single def that can be folded as - /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); - -public: - /// Create a LiveRangeEdit for breaking down parent into smaller pieces. - /// @param parent The register being spilled or split. - /// @param newRegs List to receive any new registers created. This needn't be - /// empty initially, any existing registers are ignored. - /// @param uselessRegs List of registers that can't be used when - /// rematerializing values because they are about to be removed. - LiveRangeEdit(LiveInterval &parent, - SmallVectorImpl<LiveInterval*> &newRegs, - Delegate *delegate = 0, - const SmallVectorImpl<LiveInterval*> *uselessRegs = 0) - : parent_(parent), newRegs_(newRegs), - delegate_(delegate), - uselessRegs_(uselessRegs), - firstNew_(newRegs.size()), - scannedRemattable_(false) {} - - LiveInterval &getParent() const { return parent_; } - unsigned getReg() const { return parent_.reg; } - - /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator; - iterator begin() const { return newRegs_.begin()+firstNew_; } - iterator end() const { return newRegs_.end(); } - unsigned size() const { return newRegs_.size()-firstNew_; } - bool empty() const { return size() == 0; } - LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - - ArrayRef<LiveInterval*> regs() const { - return makeArrayRef(newRegs_).slice(firstNew_); - } - - /// FIXME: Temporary accessors until we can get rid of - /// LiveIntervals::AddIntervalsForSpills - SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; } - const SmallVectorImpl<LiveInterval*> *getUselessVRegs() { - return uselessRegs_; - } - - /// createFrom - Create a new virtual register based on OldReg. - LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); - - /// create - Create a new register with the same class and original slot as - /// parent. - LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { - return createFrom(getReg(), LIS, VRM); - } - - /// anyRematerializable - Return true if any parent values may be - /// rematerializable. - /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, - AliasAnalysis*); - - /// checkRematerializable - Manually add VNI to the list of rematerializable - /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo&, AliasAnalysis*); - - /// Remat - Information needed to rematerialize at a specific location. - struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining ParentVNI. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {} - }; - - /// canRematerializeAt - Determine if ParentVNI can be rematerialized at - /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. - /// When cheapAsAMove is set, only cheap remats are allowed. - bool canRematerializeAt(Remat &RM, - SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis); - - /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an - /// instruction into MBB before MI. The new instruction is mapped, but - /// liveness is not updated. - /// Return the SlotIndex of the new instruction. - SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - LiveIntervals&, - const TargetInstrInfo&, - const TargetRegisterInfo&, - bool Late = false); - - /// markRematerialized - explicitly mark a value as rematerialized after doing - /// it manually. - void markRematerialized(const VNInfo *ParentVNI) { - rematted_.insert(ParentVNI); - } - - /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(const VNInfo *ParentVNI) const { - return rematted_.count(ParentVNI); - } - - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try - /// to erase it from LIS. - void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); - - /// eliminateDeadDefs - Try to delete machine instructions that are now dead - /// (allDefsAreDead returns true). This may cause live intervals to be trimmed - /// and further dead efs to be eliminated. - void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals&, VirtRegMap&, - const TargetInstrInfo&); - - /// calculateRegClassAndHint - Recompute register class and hint for each new - /// register. - void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, - const MachineLoopInfo&); -}; - -} - -#endif diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 2ca90f9f05c0..5a0d97d132dd 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -14,7 +14,7 @@ // the instruction, but are never used after the instruction (i.e., they are // killed). // -// This class computes live variables using are sparse implementation based on +// This class computes live variables using a sparse implementation based on // the machine code SSA form. This class computes live variable information for // each virtual and _register allocatable_ physical register in a function. It // uses the dominance properties of SSA form to efficiently compute live @@ -33,6 +33,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -41,6 +42,7 @@ using namespace llvm; char LiveVariables::ID = 0; +char &llvm::LiveVariablesID = LiveVariables::ID; INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", "Live Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) @@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock *MBB, std::vector<MachineBasicBlock*> &WorkList) { unsigned BBNum = MBB->getNumber(); - + // Check to see if this basic block is one of the killing blocks. If so, // remove it. for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) @@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry break; } - + if (MBB == DefBlock) return; // Terminate recursion if (VRInfo.AliveBlocks.test(BBNum)) @@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } @@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, unsigned BBNum = MBB->getNumber(); VarInfo& VRInfo = getVarInfo(reg); - VRInfo.NumUses++; // Check to see if this basic block is already a kill block. if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { @@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (!Def) @@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { PartDefRegs.insert(DefReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg); unsigned SubReg = *SubRegs; ++SubRegs) PartDefRegs.insert(SubReg); } @@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; SmallSet<unsigned, 8> Processed; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (Processed.count(SubReg)) continue; @@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { false/*IsDef*/, true/*IsImp*/)); PhysRegDef[SubReg] = LastPartialDef; - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) Processed.insert(*SS); } } - } - else if (LastDef && !PhysRegUse[Reg] && - !LastDef->findRegisterDefOperand(Reg)) + } else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) // Last def defines the super register, add an implicit def of reg. - LastDef->addOperand(MachineOperand::CreateReg(Reg, - true/*IsDef*/, true/*IsImp*/)); + LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); // Remember this use. PhysRegUse[Reg] = MI; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) PhysRegUse[SubReg] = MI; } @@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; unsigned LastPartDefDist = 0; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { @@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // Or whole register is defined, but only partly used. // AX<dead> = AL<imp-def> // = AL<kill> - // AX = + // AX = MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; SmallSet<unsigned, 8> PartUses; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { @@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // EAX<dead> = op AL<imp-def> // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (!PartUses.count(SubReg)) continue; @@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); PhysRegUse[SubReg] = LastRefOrPartRef; - for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg); + for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg); unsigned SSReg = *SSRegs; ++SSRegs) PhysRegUse[SSReg] = LastRefOrPartRef; } - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.erase(*SS); } } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { @@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { return true; } +void LiveVariables::HandleRegMask(const MachineOperand &MO) { + // Call HandlePhysRegKill() for all live registers clobbered by Mask. + // Clobbered registers are always dead, sp there is no need to use + // HandlePhysRegDef(). + for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) { + // Skip dead regs. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) + continue; + // Skip mask-preserved regs. + if (!MO.clobbersPhysReg(Reg)) + continue; + // Kill the largest clobbered super-register. + // This avoids needless implicit operands. + unsigned Super = Reg; + for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) + if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) + Super = *SR; + HandlePhysRegKill(Super, 0); + } +} + void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, SmallVector<unsigned, 4> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { Live.insert(Reg); - for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) Live.insert(*SS); } else { - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { // If a register isn't itself defined, but all parts that make up of it // are defined, then consider it also defined. @@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) Live.insert(*SS); } } @@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, // is referenced. HandlePhysRegKill(Reg, MI); // Only some of the sub-registers are used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (!Live.count(SubReg)) // Skip if this sub-register isn't defined. @@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; @@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); PHIJoins.clear(); + // FIXME: LiveIntervals will be updated to remove its dependence on + // LiveVariables to improve compilation time and eliminate bizarre pass + // dependencies. Until then, we can't change much in -O0. + if (!MRI->isSSA()) + report_fatal_error("regalloc=... not currently supported with -O0"); + analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the @@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Clear kill and dead markers. LV will recompute them. SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; + SmallVector<unsigned, 1> RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); @@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { HandlePhysRegUse(MOReg, MI); } + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; @@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // them. The tail callee need not take the same registers as input // that it produces as output, and there are dependencies for its input // registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() - && !MBB->back().getDesc().isCall()) { + if (!MBB->empty() && MBB->back().isReturn() + && !MBB->back().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator @@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { } } + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet<unsigned, 4> LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isLandingPad()) + continue; + for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), + LE = SuccMBB->livein_end(); LI != LE; ++LI) { + unsigned LReg = *LI; + if (!TRI->isInAllocatableClass(LReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LReg); + } + } + // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) - if (PhysRegDef[i] || PhysRegUse[i]) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); @@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, const unsigned NumNew = BB->getNumber(); // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), + for (MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 1318d6212497..238bf52dfed7 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -71,19 +71,15 @@ namespace { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { - return "Local Stack Slot Allocation"; - } private: }; } // end anonymous namespace char LocalStackSlotPass::ID = 0; - -FunctionPass *llvm::createLocalStackSlotAllocationPass() { - return new LocalStackSlotPass(); -} +char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; +INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 4c5fe4c480a6..6c8a1072697c 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + for (MachineBasicBlock::instr_iterator + I = N->instr_begin(), E = N->instr_end(); I != E; ++I) I->AddRegOperandsToUseLists(RegInfo); LeakDetector::removeGarbageObject(N); @@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { /// lists. void ilist_traits<MachineInstr>:: transferNodesFromList(ilist_traits<MachineInstr> &fromList, - MachineBasicBlock::iterator first, - MachineBasicBlock::iterator last) { + ilist_iterator<MachineInstr> first, + ilist_iterator<MachineInstr> last) { assert(Parent->getParent() == fromList.Parent->getParent() && "MachineInstr parent mismatch!"); @@ -140,33 +141,75 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { } MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { - iterator I = begin(); - while (I != end() && I->isPHI()) + instr_iterator I = instr_begin(), E = instr_end(); + while (I != E && I->isPHI()) ++I; + assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); return I; } MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { - while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue())) + iterator E = end(); + while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue())) ++I; + // FIXME: This needs to change if we wish to bundle labels / dbg_values + // inside the bundle. + assert(!I->isInsideBundle() && + "First non-phi / non-label instruction is inside a bundle!"); return I; } MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { - iterator I = end(); - while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue())) + iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) ; /*noop */ - while (I != end() && !I->getDesc().isTerminator()) + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getFirstTerminator() const { + const_iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { + instr_iterator B = instr_begin(), E = instr_end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) ++I; return I; } MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { - iterator B = begin(), I = end(); + // Skip over end-of-block dbg_value instructions. + instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getLastNonDebugInstr() const { + // Skip over end-of-block dbg_value instructions. + const_instr_iterator B = instr_begin(), I = instr_end(); while (I != B) { --I; - if (I->isDebugValue()) + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) continue; return I; } @@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const { return "(null)"; } +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getFunction()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += (Twine("BB") + Twine(getNumber())).str(); + return Name; +} + void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { @@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { return; } - if (Alignment) { OS << "Alignment " << Alignment << "\n"; } - if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; @@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + if (Alignment) { + OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) + << " bytes)"; + Comma = ", "; + } + OS << '\n'; const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); @@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; } - for (const_iterator I = begin(); I != end(); ++I) { + for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { if (Indexes) { if (Indexes->hasIndex(I)) OS << Indexes->getInstructionIndex(I); OS << '\t'; } OS << '\t'; + if (I->isInsideBundle()) + OS << " * "; I->print(OS, &getParent()->getTarget()); } @@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector<unsigned>::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); - assert(I != LiveIns.end() && "Not a live in!"); - LiveIns.erase(I); + if (I != LiveIns.end()) + LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { @@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() { TII->RemoveBranch(*this); } else { // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *succ_begin(); + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } + + // If there is no non-landing-pad successor, the block has no + // fall-through edges to be concerned with. + if (!TBB) + return; + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) TII->InsertBranch(*this, TBB, 0, Cond, dl); } @@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); - MI != ME && MI->isPHI(); ++MI) + for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), + ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { MachineOperand &MO = MI->getOperand(i); if (MO.getMBB() == fromMBB) @@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() { if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be + // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - return empty() || !back().getDesc().isBarrier() || - back().getDesc().isPredicable(); + // Barrier is predicated and thus no longer an actual control barrier. + return empty() || !back().isBarrier() || TII->isPredicated(&back()); } // If there is no branch, control always falls through. @@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) - for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { - if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef()) + if (!OI->isReg() || OI->getReg() == 0 || + !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && + if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); @@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + for (MachineBasicBlock::instr_iterator + i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) @@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addLiveIn(*I); // Update LiveVariables. + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); - for (iterator I = end(), E = begin(); I != E;) { - if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false)) + for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { + if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; - LV->getVarInfo(Reg).Kills.push_back(I); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + LV->getVarInfo(Reg).Kills.push_back(I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } @@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { return NMBB; } +MachineBasicBlock::iterator +MachineBasicBlock::erase(MachineBasicBlock::iterator I) { + if (I->isBundle()) { + MachineBasicBlock::iterator E = llvm::next(I); + return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); + } + + return Insts.erase(I.getInstrIterator()); +} + +MachineInstr *MachineBasicBlock::remove(MachineInstr *I) { + if (I->isBundle()) { + instr_iterator MII = llvm::next(I); + iterator E = end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII++; + Insts.remove(MI); + } + } + + return Insts.remove(I); +} + +void MachineBasicBlock::splice(MachineBasicBlock::iterator where, + MachineBasicBlock *Other, + MachineBasicBlock::iterator From) { + if (From->isBundle()) { + MachineBasicBlock::iterator To = llvm::next(From); + Insts.splice(where.getInstrIterator(), Other->Insts, + From.getInstrIterator(), To.getInstrIterator()); + return; + } + + Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator()); +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { @@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Cannot replace self with self!"); - MachineBasicBlock::iterator I = end(); - while (I != begin()) { + MachineBasicBlock::instr_iterator I = instr_end(); + while (I != instr_begin()) { --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; // Scan the operands of this machine instruction, replacing any uses of Old // with New. @@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping /// any DBG_VALUE instructions. Return UnknownLoc if there is none. DebugLoc -MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { +MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { DebugLoc DL; - MachineBasicBlock::iterator E = end(); - if (MBBI != E) { - // Skip debug declarations, we don't want a DebugLoc from them. - MachineBasicBlock::iterator MBBI2 = MBBI; - while (MBBI2 != E && MBBI2->isDebugValue()) - MBBI2++; - if (MBBI2 != E) - DL = MBBI2->getDebugLoc(); - } + instr_iterator E = instr_end(); + if (MBBI == E) + return DL; + + // Skip debug declarations, we don't want a DebugLoc from them. + while (MBBI != E && MBBI->isDebugValue()) + MBBI++; + if (MBBI != E) + DL = MBBI->getDebugLoc(); return DL; } /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) { +uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { if (Weights.empty()) return 0; - succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); return *getWeightIterator(I); } @@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) { return Weights.begin() + index; } +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::const_weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { + assert(Weights.size() == Successors.size() && "Async weight list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index b92cda961474..a079d6e59139 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { /// the other block frequencies. We do this to avoid using of floating points. /// BlockFrequency MachineBlockFrequencyInfo:: -getBlockFreq(MachineBasicBlock *MBB) const { +getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp new file mode 100644 index 000000000000..22d7212007fc --- /dev/null +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -0,0 +1,1001 @@ +//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations using the CFG +// structure and branch probability estimates. +// +// The pass strives to preserve the structure of the CFG (that is, retain +// a topological ordering of basic blocks) in the absense of a *strong* signal +// to the contrary from probabilities. However, within the CFG structure, it +// attempts to choose an ordering which favors placing more likely sequences of +// blocks adjacent to each other. +// +// The algorithm works from the inner-most loop within a function outward, and +// at each stage walks through the basic blocks, trying to coalesce them into +// sequential chains where allowed by the CFG (or demanded by heavy +// probabilities). Finally, it walks the blocks in topological order, and the +// first time it reaches a chain of basic blocks, it schedules them in the +// function in-order. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumCondBranches, "Number of conditional branches"); +STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(CondBranchTakenFreq, + "Potential frequency of taking conditional branches"); +STATISTIC(UncondBranchTakenFreq, + "Potential frequency of taking unconditional branches"); + +namespace { +class BlockChain; +/// \brief Type for our function-wide basic block -> block chain mapping. +typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType; +} + +namespace { +/// \brief A chain of blocks which will be laid out contiguously. +/// +/// This is the datastructure representing a chain of consecutive blocks that +/// are profitable to layout together in order to maximize fallthrough +/// probabilities. We also can use a block chain to represent a sequence of +/// basic blocks which have some external (correctness) requirement for +/// sequential layout. +/// +/// Eventually, the block chains will form a directed graph over the function. +/// We provide an SCC-supporting-iterator in order to quicky build and walk the +/// SCCs of block chains within a function. +/// +/// The block chains also have support for calculating and caching probability +/// information related to the chain itself versus other chains. This is used +/// for ranking during the final layout of block chains. +class BlockChain { + /// \brief The sequence of blocks belonging to this chain. + /// + /// This is the sequence of blocks for a particular chain. These will be laid + /// out in-order within the function. + SmallVector<MachineBasicBlock *, 4> Blocks; + + /// \brief A handle to the function-wide basic block to block chain mapping. + /// + /// This is retained in each block chain to simplify the computation of child + /// block chains for SCC-formation and iteration. We store the edges to child + /// basic blocks, and map them back to their associated chains using this + /// structure. + BlockToChainMapType &BlockToChain; + +public: + /// \brief Construct a new BlockChain. + /// + /// This builds a new block chain representing a single basic block in the + /// function. It also registers itself as the chain that block participates + /// in with the BlockToChain mapping. + BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + assert(BB && "Cannot create a chain with a null basic block"); + BlockToChain[BB] = this; + } + + /// \brief Iterator over blocks within the chain. + typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator; + + /// \brief Beginning of blocks within the chain. + iterator begin() const { return Blocks.begin(); } + + /// \brief End of blocks within the chain. + iterator end() const { return Blocks.end(); } + + /// \brief Merge a block chain into this one. + /// + /// This routine merges a block chain into this one. It takes care of forming + /// a contiguous sequence of basic blocks, updating the edge list, and + /// updating the block -> chain mapping. It does not free or tear down the + /// old chain, but the old chain's block list is no longer valid. + void merge(MachineBasicBlock *BB, BlockChain *Chain) { + assert(BB); + assert(!Blocks.empty()); + + // Fast path in case we don't have a chain already. + if (!Chain) { + assert(!BlockToChain[BB]); + Blocks.push_back(BB); + BlockToChain[BB] = this; + return; + } + + assert(BB == *Chain->begin()); + assert(Chain->begin() != Chain->end()); + + // Update the incoming blocks to point to this chain, and add them to the + // chain structure. + for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); + BI != BE; ++BI) { + Blocks.push_back(*BI); + assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); + BlockToChain[*BI] = this; + } + } + +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + void dump() LLVM_ATTRIBUTE_USED { + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->dump(); + } +#endif // NDEBUG + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; +}; +} + +namespace { +class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A typedef for a block filter set. + typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; + + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + + /// \brief A handle to the target's instruction info. + const TargetInstrInfo *TII; + + /// \brief A handle to the target's lowering info. + const TargetLowering *TLI; + + /// \brief Allocator and owner of BlockChain structures. + /// + /// We build BlockChains lazily by merging together high probability BB + /// sequences acording to the "Algo2" in the paper mentioned at the top of + /// the file. To reduce malloc traffic, we allocate them using this slab-like + /// allocator, and destroy them after the pass completes. + SpecificBumpPtrAllocator<BlockChain> ChainAllocator; + + /// \brief Function wide BasicBlock to BlockChain mapping. + /// + /// This mapping allows efficiently moving from any given basic block to the + /// BlockChain it participates in, if any. We use it to, among other things, + /// allow implicitly defining edges between chains as the existing edges + /// between basic blocks. + DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; + + void markChainSuccessors(BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *getFirstUnplacedBlock( + MachineFunction &F, + const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + void buildLoopChains(MachineFunction &F, MachineLoop &L); + void buildCFGChains(MachineFunction &F); + void AlignLoops(MachineFunction &F); + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacement::ID = 0; +char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) + +#ifndef NDEBUG +/// \brief Helper to print the name of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockName(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber() + << " (derived from LLVM BB '" << BB->getName() << "')"; + OS.flush(); + return Result; +} + +/// \brief Helper to print the number of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockNum(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS.flush(); + return Result; +} +#endif + +/// \brief Mark a chain's successors as having one fewer preds. +/// +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); + CBI != CBE; ++CBI) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), + SE = (*CBI)->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || *SI == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SuccChain.begin()); + } + } +} + +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( + MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we manually compute probabilities using the edge + // weights. This is suboptimal as it means that the somewhat subtle + // definition of edge weight semantics is encoded here as well. We should + // improve the MBPI interface to effeciently support query patterns such as + // this. + uint32_t BestWeight = 0; + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } + if (*SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0) { + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } + + // Make sure that a hot successor doesn't have a globally more important + // predecessor. + BlockFrequency CandidateEdgeFreq + = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq + = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) + << " -> non-cold CFG conflict\n"); + continue; + } + } + + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestWeight >= SuccWeight) + continue; + BestSucc = *SI; + BestWeight = SuccWeight; + } + return BestSucc; +} + +namespace { +/// \brief Predicate struct to detect blocks already placed. +class IsBlockPlaced { + const BlockChain &PlacedChain; + const BlockToChainMapType &BlockToChain; + +public: + IsBlockPlaced(const BlockChain &PlacedChain, + const BlockToChainMapType &BlockToChain) + : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} + + bool operator()(MachineBasicBlock *BB) const { + return BlockToChain.lookup(BB) == &PlacedChain; + } +}; +} + +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + IsBlockPlaced(Chain, BlockToChain)), + WorkList.end()); + + MachineBasicBlock *BestBlock = 0; + BlockFrequency BestFreq; + for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), + WBE = WorkList.end(); + WBI != WBE; ++WBI) { + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = *WBI; + BestFreq = CandidateFreq; + } + return BestBlock; +} + +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(I)) + continue; + if (BlockToChain[I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + // Now select the head of the chain to which the unplaced block belongs + // as the block to place. This will force the entire chain to be placed, + // and satisfies the requirements of merging chains. + return *BlockToChain[I]->begin(); + } + } + return 0; +} + +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, + BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); + + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *llvm::prior(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*llvm::prior(Chain.end()) == BB); + MachineBasicBlock *BestSucc = 0; + + // Look for the best viable successor if there is one to place immediately + // after this block. + BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + + if (!BestSucc) { + BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, + BlockFilter); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); + } + + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) + << " to " << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *llvm::prior(Chain.end()); + } + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); +} + +/// \brief Find the best loop top block for layout. +/// +/// This routine implements the logic to analyze the loop looking for the best +/// block to layout at the top of the loop. Typically this is done to maximize +/// fallthrough opportunities. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + BlockFrequency BestExitEdgeFreq; + MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *LoopingBB = 0; + // If there are exits to outer loops, loop rotation can severely limit + // fallthrough opportunites unless it selects such an exit. Keep a set of + // blocks where rotating to exit with that block will reach an outer loop. + SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; + + DEBUG(dbgs() << "Finding best loop exit for: " + << getBlockName(L.getHeader()) << "\n"); + for (MachineLoop::block_iterator I = L.block_begin(), + E = L.block_end(); + I != E; ++I) { + BlockChain &Chain = *BlockToChain[*I]; + // Ensure that this block is at the end of a chain; otherwise it could be + // mid-way through an inner loop or a successor of an analyzable branch. + if (*I != *llvm::prior(Chain.end())) + continue; + + // Now walk the successors. We need to establish whether this has a viable + // exiting successor and whether it has a viable non-exiting successor. + // We store the old exiting state and restore it if a viable looping + // successor isn't found. + MachineBasicBlock *OldExitingBB = ExitingBB; + BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; + // We also compute and store the best looping successor for use in layout. + MachineBasicBlock *BestLoopSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we use the internal weights. This is only valid + // because it is purely a ranking function, we don't care about anything + // but the relative values. + uint32_t BestLoopSuccWeight = 0; + // FIXME: We also manually compute the probabilities to avoid quadratic + // behavior. + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); + for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), + SE = (*I)->succ_end(); + SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + if (*SI == *I) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Don't split chains, either this chain or the successor's chain. + if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " + : "exiting: ") + << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (chain conflict)\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); + if (LoopBlockSet.count(*SI)) { + DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) + continue; + + BestLoopSucc = *SI; + BestLoopSuccWeight = SuccWeight; + continue; + } + + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + // Note that we slightly bias this toward an existing layout successor to + // retain incoming order in the absence of better information. + // FIXME: Should we bias this more strongly? It's pretty weak. + if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + ((*I)->isLayoutSuccessor(*SI) && + !(ExitEdgeFreq < BestExitEdgeFreq))) { + BestExitEdgeFreq = ExitEdgeFreq; + ExitingBB = *I; + } + + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + + // Restore the old exiting state, no viable looping successor was found. + if (!BestLoopSucc) { + ExitingBB = OldExitingBB; + BestExitEdgeFreq = OldBestExitEdgeFreq; + continue; + } + + // If this was best exiting block thus far, also record the looping block. + if (ExitingBB == *I) + LoopingBB = BestLoopSucc; + } + // Without a candidate exitting block or with only a single block in the + // loop, just use the loop header to layout the loop. + if (!ExitingBB || L.getNumBlocks() == 1) + return L.getHeader(); + + // Also, if we have exit blocks which lead to outer loops but didn't select + // one of them as the exiting block we are rotating toward, disable loop + // rotation altogether. + if (!BlocksExitingToOuterLoop.empty() && + !BlocksExitingToOuterLoop.count(ExitingBB)) + return L.getHeader(); + + assert(LoopingBB && "All successors of a loop block are exit blocks!"); + DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); + return LoopingBB; +} + +/// \brief Forms basic block chains from the natural loop structures. +/// +/// These chains are designed to preserve the existing *structure* of the code +/// as much as possible. We can then stitch the chains together in a way which +/// both preserves the topological structure and minimizes taken conditional +/// branches. +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { + // First recurse through any nested loops, building chains for those inner + // loops. + for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + buildLoopChains(F, **LI); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + + MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); + BlockChain &LoopChain = *BlockToChain[LayoutTop]; + + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + assert(LoopChain.LoopPredecessors == 0); + UpdatedPreds.insert(&LoopChain); + for (MachineLoop::block_iterator BI = L.block_begin(), + BE = L.block_end(); + BI != BE; ++BI) { + BlockChain &Chain = *BlockToChain[*BI]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); + + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } + for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); + BCI != BCE; ++BCI) + if (!LoopBlockSet.erase(*BCI)) { + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block or vice versa. + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!LoopBlockSet.empty()) { + BadLoop = true; + for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), + LBE = LoopBlockSet.end(); + LBI != LBE; ++LBI) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*LBI) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); + }); +} + +void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = FI; + BlockChain *Chain + = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + // Also, merge any blocks which we cannot reason about and must preserve + // the exact fallthrough behavior for. + for (;;) { + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + break; + + MachineFunction::iterator NextFI(llvm::next(FI)); + MachineBasicBlock *NextBB = NextFI; + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextFI != FE && "Can't fallthrough past the last block."); + DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); + Chain->merge(NextBB, 0); + FI = NextFI; + BB = NextBB; + } + } + + // Build any loop-based chains. + for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; + ++LI) + buildLoopChains(F, **LI); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + BlockChain &Chain = *BlockToChain[BB]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, BlockWorkList); + + typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; + FunctionBlockSetType FunctionBlockSet; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + FunctionBlockSet.insert(FI); + + for (BlockChain::iterator BCI = FunctionChain.begin(), + BCE = FunctionChain.end(); + BCI != BCE; ++BCI) + if (!FunctionBlockSet.erase(*BCI)) { + BadFunc = true; + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!FunctionBlockSet.empty()) { + BadFunc = true; + for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); + FBI != FBE; ++FBI) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(*FBI) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (BI == FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + + // FIXME: It would be awesome of updateTerminator would just return rather + // than assert when the branch cannot be analyzed in order to remove this + // boiler plate. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + PrevBB->updateTerminator(); + } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); +} + +/// \brief Recursive helper to align a loop and any nested loops. +static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { + // Recurse through nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + AlignLoop(F, *I, Align); + + L->getTopBlock()->setAlignment(Align); +} + +/// \brief Align loop headers to target preferred alignments. +void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return; // Don't care about loop alignment. + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + AlignLoop(F, *I, Align); +} + +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); + TII = F.getTarget().getInstrInfo(); + TLI = F.getTarget().getTargetLowering(); + assert(BlockToChain.empty()); + + buildCFGChains(F); + AlignLoops(F); + + BlockToChain.clear(); + ChainAllocator.DestroyAll(); + + // We always return true as we have no way to track whether the final order + // differs from the original order. + return true; +} + +namespace { +/// \brief A pass to compute block placement statistics. +/// +/// A separate pass to compute interesting statistics for evaluating block +/// placement. This is separate from the actual placement pass so that they can +/// be computed in the absense of any placement transformations or when using +/// alternative placement strategies. +class MachineBlockPlacementStats : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacementStats::ID = 0; +char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) + +bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(I); + Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches + : NumUncondBranches; + Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq + : UncondBranchTakenFreq; + for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); + SI != SE; ++SI) { + // Skip if this successor is a fallthrough. + if (I->isLayoutSuccessor(*SI)) + continue; + + BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + ++NumBranches; + BranchTakenFreq += EdgeFreq.getFrequency(); + } + } + + return false; +} + diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index c13fa6bc5333..0cc1af07952d 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", char MachineBranchProbabilityInfo::ID = 0; -uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; +void MachineBranchProbabilityInfo::anchor() { } +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { + // First we compute the sum with 64-bits of precision, ensuring that cannot + // overflow by bounding the number of weights considered. Hopefully no one + // actually needs 2^32 successors. + assert(MBB->succ_size() < UINT32_MAX); + uint64_t Sum = 0; + Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - + uint32_t Weight = getEdgeWeight(MBB, *I); Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; } + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return Sum; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX); + Scale = (Sum / UINT32_MAX) + 1; + Sum = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, *I); + Sum += Weight / Scale; + } + assert(Sum <= UINT32_MAX); return Sum; } uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; @@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; uint32_t MaxWeight = 0; MachineBasicBlock *MaxSucc = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - + uint32_t Weight = getEdgeWeight(MBB, *I); if (Weight > MaxWeight) { MaxWeight = Weight; - MaxSucc = Succ; + MaxSucc = *I; } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; return 0; @@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); + uint32_t Scale = 1; + uint32_t D = getSumForBlock(Src, Scale); + uint32_t N = getEdgeWeight(Src, Dst) / Scale; return BranchProbability(N, D); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 7eda8c129dc4..a63688e9ec62 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -26,13 +26,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" - using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { @@ -49,7 +50,7 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -62,6 +63,8 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); + AllocatableRegs.clear(); + ReservedRegs.clear(); } private: @@ -75,6 +78,8 @@ namespace { ScopedHTType VNT; SmallVector<MachineInstr*, 64> Exps; unsigned CurrVN; + BitVector AllocatableRegs; + BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -82,9 +87,12 @@ namespace { MachineBasicBlock::const_iterator E) const ; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned,8> &PhysRefs) const; + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned,8> &PhysRefs) const; + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -99,6 +107,7 @@ namespace { } // end anonymous namespace char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } - bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { bool Changed = false; @@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool SeenDef = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; if (!MO.isReg() || !MO.getReg()) continue; if (!TRI->regsOverlap(MO.getReg(), Reg)) @@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, SeenDef = true; } if (SeenDef) - // See a def of Reg (or an alias) before encountering any use, it's + // See a def of Reg (or an alias) before encountering any use, it's // trivially dead. return true; @@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet<unsigned,8> &PhysRefs) const { + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs) const{ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; PhysRefs.insert(Reg); - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (MO.isDef()) + PhysDefs.push_back(Reg); + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) PhysRefs.insert(*Alias); } @@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<unsigned,8> &PhysRefs) const { + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &NonLocal) const { // For now conservatively returns false if the common subexpression is - // not in the same basic block as the given instruction. - MachineBasicBlock *MBB = MI->getParent(); - if (CSMI->getParent() != MBB) - return false; + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) + return false; + + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. + return false; + } + CrossMBB = true; + } MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. - while (I != E && I->isDebugValue()) + while (I != E && I != EE && I->isDebugValue()) ++I; + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + (void)CrossMBB; + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } + if (I == E) return true; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); @@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (MCID.mayLoad()) { + if (MI->mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. @@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->getDesc().isAsCheapAsAMove()) { + if (MI->isAsCheapAsAMove()) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Commute commutable instructions. bool Commuted = false; - if (!FoundCSE && MI->getDesc().isCommutable()) { + if (!FoundCSE && MI->isCommutable()) { MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI) { Commuted = true; @@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. + bool CrossMBBPhysDef = false; SmallSet<unsigned,8> PhysRefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { + SmallVector<unsigned, 2> PhysDefs; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; - // ... Unless the CS is local and it also defines the physical register - // which is not clobbered in between and the physical register uses - // were not clobbered. + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs)) + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) FoundCSE = true; } @@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); MRI->clearKillFlags(CSEPairs[i].second); } + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; + } + MI->eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) @@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); + AllocatableRegs = TRI->getAllocatableSet(MF); + ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp new file mode 100644 index 000000000000..81b49784c052 --- /dev/null +++ b/lib/CodeGen/MachineCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineCodeEmitter.h" + +using namespace llvm; + +void MachineCodeEmitter::anchor() { } diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp new file mode 100644 index 000000000000..9730eaacf6e4 --- /dev/null +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -0,0 +1,340 @@ +//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level copy propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-cp" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead copies deleted"); + +namespace { + class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + BitVector ReservedRegs; + + public: + static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef SmallVector<unsigned, 4> DestList; + typedef DenseMap<unsigned, DestList> SourceMap; + + void SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + bool CopyPropagateBlock(MachineBasicBlock &MBB); + }; +} +char MachineCopyPropagation::ID = 0; +char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; + +INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", + "Machine Copy Propagation Pass", false, false) + +void +MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { + SourceMap::iterator SI = SrcMap.find(Reg); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + } + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + SI = SrcMap.find(*AS); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + } + } +} + +static bool NoInterveningSideEffect(const MachineInstr *CopyMI, + const MachineInstr *MI) { + const MachineBasicBlock *MBB = CopyMI->getParent(); + if (MI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CopyMI; + MachineBasicBlock::const_iterator E = MBB->end(); + MachineBasicBlock::const_iterator E2 = MI; + + ++I; + while (I != E && I != E2) { + if (I->hasUnmodeledSideEffects() || I->isCall() || + I->isTerminator()) + return false; + ++I; + } + return true; +} + +/// isNopCopy - Return true if the specified copy is really a nop. That is +/// if the source of the copy is the same of the definition of the copy that +/// supplied the source. If the source of the copy is a sub-register than it +/// must check the sub-indices match. e.g. +/// ecx = mov eax +/// al = mov cl +/// But not +/// ecx = mov eax +/// al = mov ch +static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, + const TargetRegisterInfo *TRI) { + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (Def == SrcSrc) + return true; + if (TRI->isSubRegister(SrcSrc, Def)) { + unsigned SrcDef = CopyMI->getOperand(0).getReg(); + unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); + if (!SubIdx) + return false; + return SubIdx == TRI->getSubRegIndex(SrcDef, Src); + } + + return false; +} + +bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy()) { + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Def) || + TargetRegisterInfo::isVirtualRegister(Src)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); + if (CI != AvailCopyMap.end()) { + MachineInstr *CopyMI = CI->second; + if (!ReservedRegs.test(Def) && + (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && + isNopCopy(CopyMI, Def, Src, TRI)) { + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX<def> = COPY %EAX<kill> + // ... nothing clobbered EAX. + // %EAX<def> = COPY %ECX + // => + // %ECX<def> = COPY %EAX + // + // Also avoid eliminating a copy from reserved registers unless the + // definition is proven not clobbered. e.g. + // %RSP<def> = COPY %RAX + // CALL + // %RAX<def> = COPY %RSP + + // Clear any kills of Def between CopyMI and MI. This extends the + // live range. + for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) + I->clearRegisterKills(Def, TRI); + + MI->eraseFromParent(); + Changed = true; + ++NumDeletes; + continue; + } + } + + // If Src is defined by a previous copy, it cannot be eliminated. + CI = CopyMap.find(Src); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + + // Copy is now a candidate for deletion. + MaybeDeadCopies.insert(MI); + + // If 'Src' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9<def> = copy %xmm2 + // ... + // %xmm2<def> = copy %xmm0 + // ... + // %xmm2<def> = copy %xmm9 + SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + + // Remember Def is defined by the copy. + // ... Make sure to clear the def maps of aliases first. + for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) { + CopyMap.erase(*AS); + AvailCopyMap.erase(*AS); + } + CopyMap[Def] = MI; + AvailCopyMap[Def] = MI; + for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) { + CopyMap[*SR] = MI; + AvailCopyMap[*SR] = MI; + } + + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } + + continue; + } + + // Not a copy. + SmallVector<unsigned, 2> Defs; + int RegMaskOpNum = -1; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + RegMaskOpNum = i; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + if (MO.isDef()) { + Defs.push_back(Reg); + continue; + } + + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + } + + // The instruction has a register mask operand which means that it clobbers + // a large set of registers. It is possible to use the register mask to + // prune the available copies, but treat it like a basic block boundary for + // now. + if (RegMaskOpNum >= 0) { + // Erase any MaybeDeadCopies whose destination register is clobbered. + const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + unsigned Reg = (*DI)->getOperand(0).getReg(); + if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg)) + continue; + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + + // Clear all data structures as if we were beginning a new basic block. + MaybeDeadCopies.clear(); + AvailCopyMap.clear(); + CopyMap.clear(); + SrcMap.clear(); + continue; + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + + // No longer defined by a copy. + CopyMap.erase(Reg); + AvailCopyMap.erase(Reg); + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CopyMap.erase(*AS); + AvailCopyMap.erase(*AS); + } + + // If 'Reg' is previously source of a copy, it is no longer available for + // copy propagation. + SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); + } + } + + // If MBB doesn't have successors, delete the copies whose defs are not used. + // If MBB does have successors, then conservative assume the defs are live-out + // since we don't want to trust live-in lists. + if (MBB.succ_empty()) { + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + } + } + + return Changed; +} + +bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + TRI = MF.getTarget().getRegisterInfo(); + ReservedRegs = TRI->getReservedRegs(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= CopyPropagateBlock(*I); + + return Changed; +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 20066a067b8f..d8c2f6a2eaef 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -13,12 +13,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Config/config.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -28,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" @@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo); + TBAAInfo, Ranges); } MachineMemOperand * @@ -286,7 +285,13 @@ void MachineFunction::dump() const { } void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ":\n"; + OS << "# Machine code for function " << Fn->getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; // Print Frame Information FrameInfo->print(*this, OS); @@ -335,7 +340,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getNameStr() + "' function"; + return "CFG for '" + F->getFunction()->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -368,7 +373,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr()); + ViewGraph(this, "mf" + getFunction()->getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr(), true); + ViewGraph(this, "mf" + getFunction()->getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { if (!isCalleeSavedInfoValid()) return BV; - for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); // The entry MBB always has all CSRs pristine. @@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return 8; case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: @@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { case MachineJumpTableInfo::EK_Inline: return 0; } - assert(0 && "Unknown jump table encoding!"); - return ~0; + llvm_unreachable("Unknown jump table encoding!"); } /// getEntryAlignment - Return the alignment of each entry in the jump table. @@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return TD.getABIIntegerTypeAlignment(64); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: @@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { case MachineJumpTableInfo::EK_Inline: return 1; } - assert(0 && "Unknown jump table encoding!"); - return ~0; + llvm_unreachable("Unknown jump table encoding!"); } /// createJumpTableIndex - Create a new jump table entry in the jump table info. @@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } // MachineConstantPool implementation //===----------------------------------------------------------------------===// +void MachineConstantPoolValue::anchor() { } + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // reject them. if (A->getType() == B->getType()) return false; + // We can't handle structs or arrays. + if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || + isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) + return false; + // For now, only support constants with the same size. - if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType())) + uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); + if (StoreSize != TD->getTypeStoreSize(B->getType()) || + StoreSize > 128) return false; - // If a floating-point value and an integer value have the same encoding, - // they can share a constant-pool entry. - if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A)) - if (const ConstantInt *BI = dyn_cast<ConstantInt>(B)) - return AFP->getValueAPF().bitcastToAPInt() == BI->getValue(); - if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B)) - if (const ConstantInt *AI = dyn_cast<ConstantInt>(A)) - return BFP->getValueAPF().bitcastToAPInt() == AI->getValue(); - - // Two vectors can share an entry if each pair of corresponding - // elements could. - if (const ConstantVector *AV = dyn_cast<ConstantVector>(A)) - if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) { - if (AV->getType()->getNumElements() != BV->getType()->getNumElements()) - return false; - for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i) - if (!CanShareConstantPoolEntry(AV->getOperand(i), - BV->getOperand(i), TD)) - return false; - return true; - } - - // TODO: Handle other cases. - - return false; + Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); + + // Try constant folding a bitcast of both instructions to an integer. If we + // get two identical ConstantInt's, then we are good to share them. We use + // the constant folding APIs to do this so that we get the benefit of + // TargetData. + if (isa<PointerType>(A->getType())) + A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant*>(A), TD); + else if (A->getType() != IntTy) + A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant*>(A), TD); + if (isa<PointerType>(B->getType())) + B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant*>(B), TD); + else if (B->getType() != IntTy) + B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant*>(B), TD); + + return A == B; } /// getConstantPoolIndex - Create a new entry in the constant pool or return diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 054c750c9f2b..35591e1649d3 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -19,9 +19,8 @@ using namespace llvm; char MachineFunctionAnalysis::ID = 0; -MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, - CodeGenOpt::Level OL) : - FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : + FunctionPass(ID), TM(tm), MF(0) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a240667f7d6a..e553a0463a2a 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsKill = isKill; IsDead = isDead; IsUndef = isUndef; + IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; SubReg = 0; @@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return false; switch (getType()) { - default: llvm_unreachable("Unrecognized operand type"); case MachineOperand::MO_Register: return getReg() == Other.getReg() && isDef() == Other.isDef() && getSubReg() == Other.getSubReg(); @@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress(); + case MO_RegisterMask: + return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); } + llvm_unreachable("Invalid machine operand type"); } /// print - Print the specified machine operand. @@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isEarlyClobber()) { + isInternalRead() || isEarlyClobber()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || isUndef()) { + if (isKill() || isDead() || isUndef() || isInternalRead()) { if (NeedComma) OS << ','; - if (isKill()) OS << "kill"; - if (isDead()) OS << "dead"; + NeedComma = false; + if (isKill()) { + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + OS << "dead"; + NeedComma = true; + } if (isUndef()) { - if (isKill() || isDead()) - OS << ','; + if (NeedComma) OS << ','; OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; } } OS << '>'; @@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); OS << '>'; break; + case MachineOperand::MO_RegisterMask: + OS << "<regmask>"; + break; case MachineOperand::MO_Metadata: OS << '<'; WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); @@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_MCSymbol: OS << "<MCSym=" << *getMCSymbol() << '>'; break; - default: - llvm_unreachable("Unrecognized operand type"); } if (unsigned TF = getTargetFlags()) @@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo) + const MDNode *TBAAInfo, + const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo) { + TBAAInfo(TBAAInfo), Ranges(Ranges) { assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); @@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MCID NULL and no operands. MachineInstr::MachineInstr() : MCID(0), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), + NumMemRefs(0), MemRefs(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -473,10 +491,10 @@ MachineInstr::MachineInstr() void MachineInstr::addImplicitDefUseOperands() { if (MCID->ImplicitDefs) - for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) + for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) + for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// the MCInstrDesc. MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), - MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { mmo_iterator OldMemRefs = MemRefs; - mmo_iterator OldMemRefsEnd = MemRefsEnd; + uint16_t OldNumMemRefs = NumMemRefs; - size_t NewNum = (MemRefsEnd - MemRefs) + 1; + uint16_t NewNum = NumMemRefs + 1; mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); - mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; - std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); NewMemRefs[NewNum - 1] = MO; MemRefs = NewMemRefs; - MemRefsEnd = NewMemRefsEnd; + NumMemRefs = NewNum; +} + +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator MII = *this; ++MII; + while (MII != MBB->end() && MII->isInsideBundle()) { + if (MII->getDesc().getFlags() & Mask) { + if (Type == AnyInBundle) + return true; + } else { + if (Type == AllInBundle) + return false; + } + ++MII; + } + + return Type == AllInBundle; } bool MachineInstr::isIdenticalTo(const MachineInstr *Other, @@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, Other->getNumOperands() != getNumOperands()) return false; + if (isBundle()) { + // Both instructions are bundles, compare MIs inside the bundle. + MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + while (++I1 != E1 && I1->isInsideBundle()) { + ++I2; + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + return false; + } + } + // Check operands to make sure they match. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, /// block, and returns it, but does not delete it. MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); + + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->remove(MI); + } + } getParent()->remove(this); return this; } @@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() { /// block, and deletes it. void MachineInstr::eraseFromParent() { assert(getParent() && "Not embedded in a basic block!"); + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->erase(MI); + } + } getParent()->erase(this); } @@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const { return NumOperands; } +/// isBundled - Return true if this instruction part of a bundle. This is true +/// if either itself or its following instruction is marked "InsideBundle". +bool MachineInstr::isBundled() const { + if (isInsideBundle()) + return true; + MachineBasicBlock::const_instr_iterator nextMI = this; + ++nextMI; + return nextMI != Parent->instr_end() && nextMI->isInsideBundle(); +} + bool MachineInstr::isStackAligningInlineAsm() const { if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } +/// getBundleSize - Return the number of instructions inside the MI bundle. +unsigned MachineInstr::getBundleSize() const { + assert(isBundle() && "Expecting a bundle"); + + MachineBasicBlock::const_instr_iterator I = *this; + unsigned Size = 0; + while ((++I)->isInsideBundle()) { + ++Size; + } + assert(Size > 1 && "Malformed bundle"); + + return Size; +} + /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); + // Accept regmask operands when Overlap is set. + // Ignore them when looking for a specific def operand (Overlap == false). + if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg)) + return i; if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); @@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { + assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isPredicable()) return; @@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (MCID->mayStore() || MCID->isCall()) { + if (mayStore() || isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - MCID->isTerminator() || hasUnmodeledSideEffects()) + isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (MCID->mayLoad() && !isInvariantLoad(AA)) + if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!MCID->mayStore() && - !MCID->mayLoad() && - !MCID->isCall() && + if (!mayStore() && + !mayLoad() && + !isCall() && !hasUnmodeledSideEffects()) return false; @@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!MCID->mayLoad()) + if (!mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { E = memoperands_end(); I != E; ++I) { if ((*I)->isVolatile()) return false; if ((*I)->isStore()) return false; + if ((*I)->isInvariant()) return true; if (const Value *V = (*I)->getValue()) { // A load from a constant PseudoSourceValue is invariant. @@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const { } bool MachineInstr::hasUnmodeledSideEffects() const { - if (getDesc().hasUnmodeledSideEffects()) + if (hasProperty(MCID::UnmodeledSideEffects)) return true; if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " = "; // Print the opcode name. - OS << getDesc().getName(); + if (TM && TM->getInstrInfo()) + OS << TM->getInstrInfo()->getName(getOpcode()); + else + OS << "UNKNOWN"; // Print the rest of the operands. bool OmittedAnyCallClobbers = false; @@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MF && getDesc().isCall() && + if (MF && isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { bool HasAliasLive = false; - for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg); + for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg); unsigned AliasReg = *Alias; ++Alias) if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { HasAliasLive = true; @@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, return Found; } +void MachineInstr::clearRegisterKills(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + RegInfo = 0; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned OpReg = MO.getReg(); + if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + MO.setIsKill(false); + } +} + bool MachineInstr::addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { @@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsImp*/)); } -void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs, +void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, const TargetRegisterInfo &TRI) { + bool HasRegMask = false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); + if (MO.isRegMask()) { + HasRegMask = true; + continue; + } if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0) continue; + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; bool Dead = true; - for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(), - E = UsedRegs.end(); I != E; ++I) + for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) if (TRI.regsOverlap(*I, Reg)) { Dead = false; break; @@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRe // If there are no uses, including partial uses, the def is dead. if (Dead) MO.setIsDead(); } + + // This is a call with a register mask operand. + // Mask clobbers are always dead, so add defs for the non-dead defines. + if (HasRegMask) + for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) + addRegisterDefined(*I, &TRI); } unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { - unsigned Hash = MI->getOpcode() * 37; + // Build up a buffer of hash code components. + // + // FIXME: This is a total hack. We should have a hash_value overload for + // MachineOperand, but currently that doesn't work because there are many + // different ideas of "equality" and thus different sets of information that + // contribute to the hash code. This one happens to want to take a specific + // subset. And it's still not clear that this routine uses the *correct* + // subset of information when computing the hash code. The goal is to use the + // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to + // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would + // be very useful to factor the selection of relevant inputs out of the two + // functions and into a common routine, but it's not clear how that can be + // done. + SmallVector<size_t, 8> HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - uint64_t Key = (uint64_t)MO.getType() << 32; switch (MO.getType()) { default: break; case MachineOperand::MO_Register: if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. - Key |= MO.getReg(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); break; case MachineOperand::MO_Immediate: - Key |= MO.getImm(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); break; case MachineOperand::MO_FrameIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_JumpTableIndex: - Key |= MO.getIndex(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); break; case MachineOperand::MO_MachineBasicBlock: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); break; case MachineOperand::MO_GlobalAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); break; case MachineOperand::MO_BlockAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + HashComponents.push_back(hash_combine(MO.getType(), + MO.getBlockAddress())); break; case MachineOperand::MO_MCSymbol: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); break; } - Key += ~(Key << 32); - Key ^= (Key >> 22); - Key += ~(Key << 13); - Key ^= (Key >> 8); - Key += (Key << 3); - Key ^= (Key >> 15); - Key += ~(Key << 27); - Key ^= (Key >> 31); - Hash = (unsigned)Key + Hash * 37; - } - return Hash; + } + return hash_combine_range(HashComponents.begin(), HashComponents.end()); } void MachineInstr::emitError(StringRef Msg) const { diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp new file mode 100644 index 000000000000..73489a7160bf --- /dev/null +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -0,0 +1,278 @@ +//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +namespace { + class UnpackMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + UnpackMachineBundles() : MachineFunctionPass(ID) { + initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char UnpackMachineBundles::ID = 0; +char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID; +INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", + "Unpack machine instruction bundles", false, false) + +bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), + MIE = MBB->instr_end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // Remove BUNDLE instruction and the InsideBundle flags from bundled + // instructions. + if (MI->isBundle()) { + while (++MII != MIE && MII->isInsideBundle()) { + MII->setIsInsideBundle(false); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + } + MI->eraseFromParent(); + + Changed = true; + continue; + } + + ++MII; + } + } + + return Changed; +} + + +namespace { + class FinalizeMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + FinalizeMachineBundles() : MachineFunctionPass(ID) { + initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char FinalizeMachineBundles::ID = 0; +char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID; +INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles", + "Finalize machine instruction bundles", false, false) + +bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { + return llvm::finalizeBundles(MF); +} + + +/// finalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (exclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + assert(FirstMI != LastMI && "Empty bundle?"); + + const TargetMachine &TM = MBB.getParent()->getTarget(); + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), + TII->get(TargetOpcode::BUNDLE)); + + SmallVector<unsigned, 8> LocalDefs; + SmallSet<unsigned, 8> LocalDefSet; + SmallSet<unsigned, 8> DeadDefSet; + SmallSet<unsigned, 8> KilledDefSet; + SmallVector<unsigned, 8> ExternUses; + SmallSet<unsigned, 8> ExternUseSet; + SmallSet<unsigned, 8> KilledUseSet; + SmallSet<unsigned, 8> UndefUseSet; + SmallVector<MachineOperand*, 4> Defs; + for (; FirstMI != LastMI; ++FirstMI) { + for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = FirstMI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(&MO); + continue; + } + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { + MO.setIsInternalRead(); + if (MO.isKill()) + // Internal def is now killed. + KilledDefSet.insert(Reg); + } else { + if (ExternUseSet.insert(Reg)) { + ExternUses.push_back(Reg); + if (MO.isUndef()) + UndefUseSet.insert(Reg); + } + if (MO.isKill()) + // External def is now killed. + KilledUseSet.insert(Reg); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + MachineOperand &MO = *Defs[i]; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (LocalDefSet.insert(Reg)) { + LocalDefs.push_back(Reg); + if (MO.isDead()) { + DeadDefSet.insert(Reg); + } + } else { + // Re-defined inside the bundle, it's no longer killed. + KilledDefSet.erase(Reg); + if (!MO.isDead()) + // Previously defined but dead. + DeadDefSet.erase(Reg); + } + + if (!MO.isDead()) { + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (LocalDefSet.insert(SubReg)) + LocalDefs.push_back(SubReg); + } + } + } + + FirstMI->setIsInsideBundle(); + Defs.clear(); + } + + SmallSet<unsigned, 8> Added; + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + if (Added.insert(Reg)) { + // If it's not live beyond end of the bundle, mark it dead. + bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); + MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | + getImplRegState(true)); + } + } + + for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { + unsigned Reg = ExternUses[i]; + bool isKill = KilledUseSet.count(Reg); + bool isUndef = UndefUseSet.count(Reg); + MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | + getImplRegState(true)); + } +} + +/// finalizeBundle - Same functionality as the previous finalizeBundle except +/// the last instruction in the bundle is not provided as an input. This is +/// used in cases where bundles are pre-determined by marking instructions +/// with 'InsideBundle' marker. It returns the MBB instruction iterator that +/// points to the end of the bundle. +MachineBasicBlock::instr_iterator +llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI) { + MachineBasicBlock::instr_iterator E = MBB.instr_end(); + MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI); + while (LastMI != E && LastMI->isInsideBundle()) + ++LastMI; + finalizeBundle(MBB, FirstMI, LastMI); + return LastMI; +} + +/// finalizeBundles - Finalize instruction bundles in the specified +/// MachineFunction. Return true if any bundles are finalized. +bool llvm::finalizeBundles(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &MBB = *I; + + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); + assert(!MII->isInsideBundle() && + "First instr cannot be inside bundle before finalization!"); + + MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; + for (++MII; MII != MIE; ) { + if (!MII->isInsideBundle()) + ++MII; + else { + MII = finalizeBundle(MBB, llvm::prior(MII)); + Changed = true; + } + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// MachineOperand iterator +//===----------------------------------------------------------------------===// + +MachineOperandIteratorBase::RegInfo +MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, + SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { + RegInfo RI = { false, false, false }; + for(; isValid(); ++*this) { + MachineOperand &MO = deref(); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + // Remember each (MI, OpNo) that refers to Reg. + if (Ops) + Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + + // Both defs and uses can read virtual registers. + if (MO.readsReg()) { + RI.Reads = true; + if (MO.isDef()) + RI.Tied = true; + } + + // Only defs can write. + if (MO.isDef()) + RI.Writes = true; + else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + RI.Tied = true; + } + return RI; +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index a1f80d5282e0..8c562cc4454a 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -45,7 +45,7 @@ using namespace llvm; static cl::opt<bool> AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted, namespace { class MachineLICM : public MachineFunctionPass { - bool PreRegAlloc; - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetLowering *TLI; @@ -69,6 +67,7 @@ namespace { const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; const InstrItineraryData *InstrItins; + bool PreRegAlloc; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -81,7 +80,13 @@ namespace { MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. - BitVector AllocatableSet; + // Exit blocks for CurLoop. + SmallVector<MachineBasicBlock*, 8> ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } // Track 'estimated' register pressure. SmallSet<unsigned, 32> RegSeen; @@ -122,8 +127,6 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "Machine Instruction LICM"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); @@ -165,7 +168,9 @@ namespace { /// ProcessMI - Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. - void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, + void ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, SmallVector<CandidateInfo, 32> &Candidates); @@ -182,12 +187,12 @@ namespace { /// invariant. I.e., all virtual register operands are defined outside of /// the loop, physical registers aren't accessed (explicitly or implicitly), /// and the instruction is hoistable. - /// + /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -200,7 +205,7 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); + bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -215,13 +220,25 @@ namespace { /// If not then a load from this mbb may not be safe to hoist. bool IsGuaranteedToExecute(MachineBasicBlock *BB); - /// HoistRegion - Walk the specified region of the CFG (defined by all - /// blocks dominated by the specified block, and that are in the current - /// loop) in depth first order w.r.t the DominatorTree. This allows us to - /// visit definitions before uses, allowing us to hoist a loop body in one - /// pass without iteration. + void EnterScope(MachineBasicBlock *MBB); + + void ExitScope(MachineBasicBlock *MBB); + + /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given + /// dominator tree node if its a leaf or all of its children are done. Walk + /// up the dominator tree to destroy ancestors which are now done. + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + + /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all + /// blocks dominated by the specified header block, and that are in the + /// current loop) in depth first order w.r.t the DominatorTree. This allows + /// us to visit definitions before uses, allowing us to hoist a loop body in + /// one pass without iteration. /// - void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); /// getRegisterClassIDAndCost - For a given MI, register, and the operand /// index, return the ID and cost of its representative register class by @@ -278,6 +295,7 @@ namespace { } // end anonymous namespace char MachineLICM::ID = 0; +char &llvm::MachineLICMID = MachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) -FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { - return new MachineLICM(PreRegAlloc); -} - /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most /// loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { @@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - if (PreRegAlloc) - DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); - else - DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); - Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); @@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); InstrItins = TM->getInstrItineraryData(); - AllocatableSet = TRI->getAllocatableSet(MF); + + PreRegAlloc = MRI->isSSA(); + + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. @@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); FirstInLoop = true; - HoistRegion(N, true); + HoistOutOfLoop(N); CSEMap.clear(); } } @@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { /// ProcessMI - Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. void MachineLICM::ProcessMI(MachineInstr *MI, - unsigned *PhysRegDefs, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, SmallVector<CandidateInfo, 32> &Candidates) { bool RuledOut = false; @@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI, continue; } + // We can't hoist an instruction defining a physreg that is clobbered in + // the loop. + if (MO.isRegMask()) { + PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + continue; + } + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, "Not expecting virtual register!"); if (!MO.isDef()) { - if (Reg && PhysRegDefs[Reg]) + if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) // If it's using a non-loop-invariant register, then it's obviously not // safe to hoist. HasNonInvariantUse = true; @@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } if (MO.isImplicit()) { - ++PhysRegDefs[Reg]; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - ++PhysRegDefs[*AS]; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + PhysRegClobbers.set(*AS); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI, Def = Reg; // If we have already seen another instruction that defines the same - // register, then this is not safe. - if (++PhysRegDefs[Reg] > 1) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - if (++PhysRegDefs[*AS] > 1) + // register, then this is not safe. Two defs is indicated by setting a + // PhysRegClobbers bit. + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + if (PhysRegClobbers.test(*AS)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. RuledOut = true; + PhysRegDefs.set(*AS); + } } // Only consider reloads for now and remats which do not have register @@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop /// invariants out to the preheader. void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + unsigned NumRegs = TRI->getNumRegs(); - unsigned *PhysRegDefs = new unsigned[NumRegs]; - std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. + BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. SmallVector<CandidateInfo, 32> Candidates; SmallSet<int, 32> StoredFIs; @@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() { for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; - ++PhysRegDefs[Reg]; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - ++PhysRegDefs[*AS]; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + PhysRegDefs.set(*AS); } SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ++MII) { MachineInstr *MI = &*MII; - ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); + ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); + } + } + + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + TermRegs.set(*AS); } } @@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() { // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; - if (PhysRegDefs[Candidates[i].Def] == 1) { + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - if (PhysRegDefs[MO.getReg()]) { + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; @@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() { HoistPostRA(MI, Candidates[i].Def); } } - - delete[] PhysRegDefs; } /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current @@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG({ - dbgs() << "Hoisting " << *MI; - if (Preheader->getBasicBlock()) - dbgs() << " to MachineBasicBlock " - << Preheader->getName(); - if (MI->getParent()->getBasicBlock()) - dbgs() << " from MachineBasicBlock " - << MI->getParent()->getName(); - dbgs() << "\n"; - }); + DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" + << MI->getParent()->getNumber() << ": " << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); - // Add register to livein list to all the BBs in the current loop since a + // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is // important to ensure later passes do not scavenge the def register. AddToLiveIns(Def); @@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; - + if (BB != CurLoop->getHeader()) { // Check loop exiting blocks. SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; @@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } -/// HoistRegion - Walk the specified region of the CFG (defined by all blocks -/// dominated by the specified block, and that are in the current loop) in depth -/// first order w.r.t the DominatorTree. This allows us to visit definitions -/// before uses, allowing us to hoist a loop body in one pass without iteration. -/// -void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { - assert(N != 0 && "Null dominator tree node?"); - MachineBasicBlock *BB = N->getBlock(); +void MachineLICM::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); - // If the header of the loop containing this basic block is a landing pad, - // then don't try to hoist instructions out of this loop. - const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) return; + // Remember livein register pressure. + BackTrace.push_back(RegPressure); +} - // If this subregion is not in the top level loop at all, exit. - if (!CurLoop->contains(BB)) return; +void MachineLICM::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + BackTrace.pop_back(); +} - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) return; - if (IsHeader) { + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all +/// blocks dominated by the specified header block, and that are in the +/// current loop) in depth first order w.r.t the DominatorTree. This allows +/// us to visit definitions before uses, allowing us to hoist a loop body in +/// one pass without iteration. +/// +void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(HeaderN); + do { + MachineDomTreeNode *Node = WorkList.pop_back_val(); + assert(Node != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = Node->getBlock(); + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) + continue; + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) + continue; + + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() >= 25) + NumChildren = 0; + + OpenChildren[Node] = NumChildren; + // Add children in reverse order as then the next popped worklist node is + // the first child of this node. This means we ultimately traverse the + // DOM tree in exactly the same order as if we'd recursed. + for (int i = (int)NumChildren-1; i >= 0; --i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + if (Scopes.size() != 0) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + // Compute registers which are livein into the loop headers. RegSeen.clear(); BackTrace.clear(); InitRegPressure(Preheader); } - // Remember livein register pressure. - BackTrace.push_back(RegPressure); + // Now perform LICM. + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); - SpeculationState = SpeculateUnknown; - for (MachineBasicBlock::iterator - MII = BB->begin(), E = BB->end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = MII; ++NextMII; - MachineInstr *MI = &*MII; - if (!Hoist(MI, Preheader)) - UpdateRegPressure(MI); - MII = NextMII; - } + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + continue; - // Don't hoist things out of a large switch statement. This often causes - // code to be hoisted that wasn't going to be executed, and increases - // register pressure in a situation where it's likely to matter. - if (BB->succ_size() < 25) { - const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) - HoistRegion(Children[I]); - } + EnterScope(MBB); - BackTrace.pop_back(); + // Process the block + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr *MI = &*MII; + if (!Hoist(MI, Preheader)) + UpdateRegPressure(MI); + MII = NextMII; + } + + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } } static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { @@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, unsigned &RCId, unsigned &RCCost) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); EVT VT = *RC->vt_begin(); - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { RCId = RC->getID(); RCCost = 1; } else { @@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, RCCost = TLI->getRepRegClassCostFor(VT); } } - + /// InitRegPressure - Find all virtual register references that are liveout of /// the preheader to initialize the starting "register pressure". Note this /// does not count live through (livein but not used) registers. @@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { } } +/// isLoadFromGOTOrConstantPool - Return true if this machine instruction +/// loads from global offset table or constant pool. +static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert (MI.mayLoad() && "Expected MI that loads!"); + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), + E = MI.memoperands_end(); I != E; ++I) { + if (const Value *V = (*I)->getValue()) { + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + return true; + } + } + return false; +} + /// IsLICMCandidate - Returns true if the instruction may be a suitable /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. @@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of - // the loop which does not execute this load, so we can't hoist it. + // the loop which does not execute this load, so we can't hoist it. Loads + // from constant memory are not safe to speculate all the time, for example + // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent())) + if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && + !IsGuaranteedToExecute(I.getParent())) return false; return true; @@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { /// invariant. I.e., all virtual register operands are defined outside of the /// loop, physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -/// +/// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (!IsLICMCandidate(I)) return false; @@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->def_empty(Reg)) - return false; - if (AllocatableSet.test(Reg)) + if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) return false; - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI->def_empty(AliasReg)) - return false; - if (AllocatableSet.test(AliasReg)) - return false; - } // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { @@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector<const MachineInstr*, 8> Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// IsCheapInstruction - Return true if the instruction is marked "cheap" or /// the operand latency between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { - if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike()) + if (MI.isAsCheapAsAMove() || MI.isCopyLike()) return true; if (!InstrItins || InstrItins->isEmpty()) return false; @@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, + bool CheapInstr) { for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { - if (CI->second <= 0) + if (CI->second <= 0) continue; unsigned RCId = CI->first; + unsigned Limit = RegLimit[RCId]; + int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { SmallVector<unsigned, 8> &RP = BackTrace[i-1]; - if (RP[RCId] + CI->second >= RegLimit[RCId]) + if (RP[RCId] + Cost >= Limit) return true; } } @@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap<unsigned, int> Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; + + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; } + } - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } - // High register pressure situation, only hoist if the instruction is going to - // be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - if (HasAnyPHIUse(MO.getReg())) - return false; + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; } return true; @@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. - if (MI->getDesc().canFoldAsLoad()) + if (MI->canFoldAsLoad()) return 0; // If not, we may be able to unfold a load and hoist that. @@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { assert(NewMIs.size() == 2 && "Unfolded a load into multiple instructions!"); MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MI, NewMIs[0]); - MBB->insert(MI, NewMIs[1]); + MachineBasicBlock::iterator Pos = MI; + MBB->insert(Pos, NewMIs[0]); + MBB->insert(Pos, NewMIs[1]); // If unfolding produced a load that wasn't loop-invariant or profitable to // hoist, discard the new instructions and bail. if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { @@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, // Replace virtual registers defined by MI by their counterparts defined // by Dup. + SmallVector<unsigned, 2> Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); - MRI->clearKillFlags(Dup->getOperand(i).getReg()); + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + Defs.push_back(i); + } + + SmallVector<const TargetRegisterClass*, 2> OrigRCs; + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + OrigRCs.push_back(MRI->getRegClass(DupReg)); + + if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { + // Restore old RCs if more than one defs. + for (unsigned j = 0; j != i; ++j) + MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); + return false; } } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + MRI->replaceRegWith(Reg, DupReg); + MRI->clearKillFlags(DupReg); + } + MI->eraseFromParent(); ++NumCSEed; return true; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 80c4854238af..ea98b23c6d57 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, : ImmutablePass(ID), Context(MAI, MRI, MOFI), ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false), - CallsExternalVAFunctionWithFloatingPointArguments(false) { + UsesVAFloatArgument(false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); @@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, MachineModuleInfo::MachineModuleInfo() : ImmutablePass(ID), Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { - assert(0 && "This MachineModuleInfo constructor should never be called, MMI " - "should always be explicitly constructed by LLVMTargetMachine"); - abort(); + llvm_unreachable("This MachineModuleInfo constructor should never be called, " + "MMI should always be explicitly constructed by " + "LLVMTargetMachine"); } MachineModuleInfo::~MachineModuleInfo() { @@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { /// indexes. void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites) { - for (unsigned I = 0, E = Sites.size(); I != E; ++I) - LPadToCallSiteMap[Sym].push_back(Sites[I]); + LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); } /// getTypeIDFor - Return the type id for the specified typeinfo. This is @@ -541,8 +540,7 @@ try_next:; // Add the new filter. int FilterID = -(1 + FilterIds.size()); FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); - for (unsigned I = 0, N = TyIds.size(); I != N; ++I) - FilterIds.push_back(TyIds[I]); + FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); FilterEnds.push_back(FilterIds.size()); FilterIds.push_back(0); // terminator return FilterID; @@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { const Function* Personality = NULL; // Scan landing pads. If there is at least one non-NULL personality - use it. - for (unsigned i = 0; i != LandingPads.size(); ++i) + for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) if (LandingPads[i].Personality) { Personality = LandingPads[i].Personality; break; } - for (unsigned i = 0; i < Personalities.size(); ++i) { + for (unsigned i = 0, e = Personalities.size(); i < e; ++i) { if (Personalities[i] == Personality) return i; } diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index 9f4ef1287803..58e067bcb9b2 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -16,6 +16,7 @@ using namespace llvm; +void MachinePassRegistryListener::anchor() { } /// Add - Adds a function pass to the registration list. /// diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 266ebf64a3fc..7ea151713a6d 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -18,11 +18,12 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true) { + : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegs.resize(TRI.getNumRegs()); - + UsedPhysRegMask.resize(TRI.getNumRegs()); + // Create the physreg use/def lists. PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); @@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && - "Vreg use list non-empty still?"); + clearVirtRegs(); for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); @@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { // Accumulate constraints from all uses. for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { - // TRI doesn't have accurate enough information to model this yet. - if (I.getOperand().getSubReg()) - return false; const TargetRegisterClass *OpRC = I->getRegClassConstraint(I.getOperandNo(), TII, TRI); - if (OpRC) + if (unsigned SubIdx = I.getOperand().getSubReg()) { + if (OpRC) + NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + else + NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + } else if (OpRC) NewRC = TRI->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; @@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ return Reg; } +/// clearVirtRegs - Remove all virtual registers (after physreg assignment). +void MachineRegisterInfo::clearVirtRegs() { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && + "Vreg use list non-empty still?"); +#endif + VRegInfo.clear(); +} + /// HandleVRegListReallocation - We just added a virtual register to the /// VRegInfo info list and it reallocated. Update the use/def lists info /// pointers. @@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - if (!def_empty(Reg)) - return &*def_begin(Reg); - return 0; + def_iterator I = def_begin(Reg); + return !I.atEnd() ? &*I : 0; } bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { @@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } -void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { - for (int i = UsedPhysRegs.find_first(); i >= 0; - i = UsedPhysRegs.find_next(i)) - for (const unsigned *SS = TRI.getSubRegisters(i); - unsigned SubReg = *SS; ++SS) - if (SubReg > unsigned(i)) - UsedPhysRegs.set(SubReg); -} - #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) I.getOperand().getParent()->dump(); } #endif + +void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { + ReservedRegs = TRI->getReservedRegs(MF); +} + +bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + + // Check if any overlapping register is modified. + for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) + if (!def_empty(*R)) + return false; + + // Check if any overlapping register is allocatable so it may be used later. + if (AllocatableRegs.empty()) + AllocatableRegs = TRI->getAllocatableSet(MF); + for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) + if (AllocatableRegs.test(*R)) + return false; + return true; +} diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 84d6df25397c..070a55704dc5 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, if (BB->empty()) return 0; - MachineBasicBlock::iterator I = BB->front(); + MachineBasicBlock::iterator I = BB->begin(); if (!I->isPHI()) return 0; @@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { return DupPHI; // Otherwise, we do need a PHI: insert one now. - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI, TII); @@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, } llvm_unreachable("MachineOperand::getParent() failure?"); - return 0; } /// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, @@ -311,7 +310,7 @@ public: /// Add it into the specified block and return the register. static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, MachineSSAUpdater *Updater) { - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, Updater->VRC, Updater->MRI, Updater->TII); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp new file mode 100644 index 000000000000..1d3241b8cc6b --- /dev/null +++ b/lib/CodeGen/MachineScheduler.cpp @@ -0,0 +1,614 @@ +//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" + +#include <queue> + +using namespace llvm; + +static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +//===----------------------------------------------------------------------===// +// Machine Instruction Scheduling Pass and Registry +//===----------------------------------------------------------------------===// + +namespace { +/// MachineScheduler runs after coalescing and before register allocation. +class MachineScheduler : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineScheduler(); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory() {} + + virtual bool runOnMachineFunction(MachineFunction&); + + virtual void print(raw_ostream &O, const Module* = 0) const; + + static char ID; // Class identification, replacement for typeinfo +}; +} // namespace + +char MachineScheduler::ID = 0; + +char &llvm::MachineSchedulerID = MachineScheduler::ID; + +INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) + +MachineScheduler::MachineScheduler() +: MachineFunctionPass(ID) { + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachinePassRegistry MachineSchedRegistry::Registry; + +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return 0; +} + +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, + RegisterPassParser<MachineSchedRegistry> > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); + +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AliasAnalysis>(); + + LIS = &getAnalysis<LiveIntervals>(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor == useDefaultMachineSched) { + // Get the default scheduler set by the target. + Ctor = MachineSchedRegistry::getDefault(); + if (!Ctor) { + Ctor = createConvergingSched; + MachineSchedRegistry::setDefault(Ctor); + } + } + // Instantiate the selected scheduler. + OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + + // Visit all machine basic blocks. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler->startBlock(MBB); + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() + || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingCount; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + break; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->exitRegion(); + continue; + } + DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " Remaining: " << RemainingCount << "\n"); + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->schedule(); + + // Close the current region. + Scheduler->exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler->begin(); + } + assert(RemainingCount == 0 && "Instruction count mismatch!"); + Scheduler->finishBlock(); + } + Scheduler->finalizeSchedule(); + DEBUG(LIS->print(dbgs())); + return true; +} + +void MachineScheduler::print(raw_ostream &O, const Module* m) const { + // unimplemented +} + +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// + +namespace { +class ScheduleDAGMI; + +/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected +/// scheduling algorithm. +/// +/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it +/// in ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(ScheduleDAGMI *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +namespace { +/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules +/// machine instructions while updating LiveIntervals. +class ScheduleDAGMI : public ScheduleDAGInstrs { + AliasAnalysis *AA; + MachineSchedStrategy *SchedImpl; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; +public: + ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), + NumInstrsScheduled(0) {} + + ~ScheduleDAGMI() { + delete SchedImpl; + } + + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } + + /// Implement ScheduleDAGInstrs interface. + void schedule(); + +protected: + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + + void releaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); +}; +} // namespace + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void ScheduleDAGMI::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Fix RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + RegionBegin = llvm::next(RegionBegin); + BB->splice(InsertPos, BB, MI); + LIS->handleMove(MI); + // Fix RegionBegin if another instruction moves above the first instruction. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. +void ScheduleDAGMI::schedule() { + buildSchedGraph(AA); + + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + // Release all DAG roots for scheduling. + for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end(); + I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + SchedImpl->releaseBottomNode(&(*I)); + } + + CurrentTop = RegionBegin; + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction:\n"; SU->dump(this)); + if (!checkSchedLimit()) + break; + + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + ++CurrentTop; + else + moveInstruction(MI, CurrentTop); + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + if (&*llvm::prior(CurrentBottom) == MI) + --CurrentBottom; + else { + if (&*CurrentTop == MI) + CurrentTop = llvm::next(CurrentTop); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Release dependent instructions for scheduling. + releasePredecessors(SU); + } + SU->isScheduled = true; + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); +} + +//===----------------------------------------------------------------------===// +// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { + ScheduleDAGMI *DAG; + + unsigned NumTopReady; + unsigned NumBottomReady; + +public: + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + } + + virtual SUnit *pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) + return NULL; + + // As an initial placeholder heuristic, schedule in the direction that has + // the fewest choices. + SUnit *SU; + if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { + SU = DAG->getSUnit(DAG->top()); + IsTopNode = true; + } + else { + SU = DAG->getSUnit(llvm::prior(DAG->bottom())); + IsTopNode = false; + } + if (SU->isTopReady()) { + assert(NumTopReady > 0 && "bad ready count"); + --NumTopReady; + } + if (SU->isBottomReady()) { + assert(NumBottomReady > 0 && "bad ready count"); + --NumBottomReady; + } + return SU; + } + + virtual void releaseTopNode(SUnit *SU) { + ++NumTopReady; + } + virtual void releaseBottomNode(SUnit *SU) { + ++NumBottomReady; + } +}; +} // namespace + +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new ConvergingScheduler()); +} +static MachineSchedRegistry +ConvergingSchedRegistry("converge", "Standard converging scheduler.", + createConvergingSched); + +//===----------------------------------------------------------------------===// +// Machine Instruction Shuffler for Correctness Testing +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template<bool IsReverse> +struct SUnitOrder { + bool operator()(SUnit *A, SUnit *B) const { + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; + } +}; + +/// Reorder instructions as much as possible. +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder<false>) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + BottomQ; +public: + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} + + virtual void initialize(ScheduleDAGMI *) { + TopQ.clear(); + BottomQ.clear(); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return NULL; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return NULL; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; + return SU; + } + + virtual void releaseTopNode(SUnit *SU) { + TopQ.push(SU); + } + virtual void releaseBottomNode(SUnit *SU) { + BottomQ.push(SU); + } +}; +} // namespace + +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); +} +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); +#endif // !NDEBUG diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 29cfb49953b9..1ce546b578ad 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -32,7 +32,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt<bool> +static cl::opt<bool> SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); @@ -90,12 +90,19 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo); + bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; } // end anonymous namespace char MachineSinking::ID = 0; +char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineSinking, "machine-sink", "Machine code sinking", false, false) -FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } - bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { if (!MI->isCopy()) @@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); + // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) return true; - // Ignoring debug uses is necessary so debug info doesn't affect the code. - // This may leave a referencing dbg_value in the original block, before - // the definition of the vreg. Dwarf generator handles this although the - // user might not get the right info at runtime. - // BreakPHIEdge is true if all the uses are in the successor MBB being sunken // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. @@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, if (!CEBCandidates.insert(std::make_pair(From, To))) return true; - if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove()) + if (!MI->isCopy() && !MI->isAsCheapAsAMove()) return true; // MI is cheap, we probably don't want to break the critical edge for it. @@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); } -/// collectDebgValues - Scan instructions following MI and collect any +/// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. -static void collectDebugValues(MachineInstr *MI, +static void collectDebugValues(MachineInstr *MI, SmallVector<MachineInstr *, 2> & DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) @@ -401,35 +402,76 @@ static void collectDebugValues(MachineInstr *MI, } } -/// SinkInstruction - Determine whether it is safe to sink the specified machine -/// instruction out of its current block into a successor. -bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { - // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to - // be close to the source to make it easier to coalesce. - if (AvoidsSinking(MI, MRI)) +/// isPostDominatedBy - Return true if A is post dominated by B. +static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { + + // FIXME - Use real post dominator. + if (A->succ_size() != 2) + return false; + MachineBasicBlock::succ_iterator I = A->succ_begin(); + if (B == *I) + ++I; + MachineBasicBlock *OtherSuccBlock = *I; + if (OtherSuccBlock->succ_size() != 1 || + *(OtherSuccBlock->succ_begin()) != B) return false; - // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, AA, SawStore)) + return true; +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) return false; - // FIXME: This should include support for sinking instructions within the - // block they are currently in to shorten the live ranges. We often get - // instructions sunk into the top of a large block, but it would be better to - // also sink them down before their first use in the block. This xform has to - // be careful not to *increase* register pressure though, e.g. sinking - // "x = y + z" down if it kills y and z would increase the live ranges of y - // and z and only shrink the live range of x. + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!isPostDominatedBy(MBB, SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then catch results. + if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + +/// FindSuccToSinkTo - Find a successor to sink this instruction to. +MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, + MachineBasicBlock *MBB, + bool &BreakPHIEdge) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. - MachineBasicBlock *ParentBlock = MI->getParent(); // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - - bool BreakPHIEdge = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -442,24 +484,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->def_empty(Reg)) - return false; - - if (AllocatableSet.test(Reg)) - return false; - - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI->def_empty(AliasReg)) - return false; - - if (AllocatableSet.test(AliasReg)) - return false; - } + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + return NULL; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. - return false; + return NULL; } } else { // Virtual register uses are always safe to sink. @@ -467,7 +496,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) - return false; + return NULL; // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where @@ -488,48 +517,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) - return false; + return NULL; continue; } // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), - E = ParentBlock->succ_end(); SI != E; ++SI) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; - if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, BreakPHIEdge, LocalUse)) { - SuccToSinkTo = *SI; + SuccToSinkTo = SuccBlock; break; } if (LocalUse) // Def is used locally, it's never safe to move this def. - return false; + return NULL; } // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) - return false; + return NULL; + else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return NULL; } } - // If there are no outputs, it must have side-effects. - if (SuccToSinkTo == 0) - return false; + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MBB == SuccToSinkTo) + return NULL; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. - if (SuccToSinkTo->isLandingPad()) + if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + return NULL; + + return SuccToSinkTo; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to + // be close to the source to make it easier to coalesce. + if (AvoidsSinking(MI, MRI)) return false; - // It is not possible to sink an instruction into its own block. This can - // happen with loops. - if (MI->getParent() == SuccToSinkTo) + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, AA, SawStore)) return false; + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + bool BreakPHIEdge = false; + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 26847d39e7ad..74ba94d1fcc0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -69,14 +70,17 @@ namespace { unsigned foundErrors; typedef SmallVector<unsigned, 16> RegVector; + typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; const MachineInstr *FirstTerminator; BitVector regsReserved; + BitVector regsAllocatable; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; + RegMaskVector regMasks; RegSet regsLiveInButUnused; SlotIndex lastIndex; @@ -85,7 +89,7 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) RV.push_back(*R); } @@ -175,6 +179,10 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } + bool isAllocatable(unsigned Reg) { + return Reg < regsAllocatable.size() && regsAllocatable.test(Reg); + } + // Analysis information if available LiveVariables *LiveVars; LiveIntervals *LiveInts; @@ -194,6 +202,7 @@ namespace { void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); @@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); - for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), - MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { report("Bad instruction parent pointer", MFI); *OS << "Instruction: " << *MBBI; continue; } + // Skip BUNDLE instruction for now. FIXME: We should add code to verify + // the BUNDLE's specifically. + if (MBBI->isBundle()) + continue; visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); @@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { regsDefined.clear(); regsDead.clear(); regsKilled.clear(); + regMasks.clear(); regsLiveInButUnused.clear(); MBBInfoMap.clear(); @@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getNameStr() << "\n"; + << "- function: " << MF->getFunction()->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() { // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; Reg = regsReserved.find_next(Reg)) { - for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { // FIXME: This should probably be: // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); regsReserved.set(*Sub); } } + + regsAllocatable = TRI->getAllocatableSet(*MF); + markReachable(&MF->front()); } @@ -393,6 +410,20 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = 0; + if (MRI->isSSA()) { + // If this block has allocatable physical registers live-in, check that + // it is an entry block or landing pad. + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); + LI != LE; ++LI) { + unsigned reg = *LI; + if (isAllocatable(reg) && !MBB->isLandingPad() && + MBB != MBB->getParent()->begin()) { + report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); + } + } + } + // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + if (!MBB->empty() && MBB->back().isBarrier() && !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); @@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().getDesc().isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { continue; } regsLive.insert(*I); - for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++) regsLive.insert(*R); } regsLiveInButUnused = regsLive; @@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { regsLive.insert(I); - for (const unsigned *R = TRI->getSubRegisters(I); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++) regsLive.insert(*R); } @@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !MCID.mayLoad()) + if ((*I)->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MCID.mayStore()) + if ((*I)->isStore() && !MI->mayStore()) report("Missing mayStore flag", MI); } // Debug values must not have a slot index. - // Other instructions must have one. + // Other instructions must have one, unless they are inside a bundle. if (LiveInts) { bool mapped = !LiveInts->isNotInMIMap(MI); if (MI->isDebugValue()) { if (mapped) report("Debug instruction has a slot index", MI); + } else if (MI->isInsideBundle()) { + if (mapped) + report("Instruction inside bundle has a slot index", MI); } else { if (!mapped) report("Missing slot index", MI); @@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - if (MCID.isTerminator()) { + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && - !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) @@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const unsigned Reg = MO->getReg(); if (!Reg) return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); - // Check Live Variables. - if (MI->isDebugValue()) { - // Liveness checks are not valid for debug values. - } else if (MO->isUse() && !MO->isUndef()) { - regsLiveInButUnused.erase(Reg); - - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", - MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) - addRegWithSubRegs(regsKilled, Reg); - - // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); - } - - // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; - } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - - // Use of a dead register. - if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) - report("Using an undefined physical register", MO, MONum); - } else { - BBInfo &MInfo = MBBInfoMap[MI->getParent()]; - // We don't know which virtual registers are live in, so only complain - // if vreg was killed in this MBB. Otherwise keep track of vregs that - // must be live in. PHI instructions are handled separately. - if (MInfo.regsKilled.count(Reg)) - report("Using a killed virtual register", MO, MONum); - else if (!MI->isPHI()) - MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); - } - } - } else if (MO->isDef()) { - // Register defined. - // TODO: verify that earlyclobber ops are not used. - if (MO->isDead()) - addRegWithSubRegs(regsDead, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - - // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) - report("Multiple virtual register defs in SSA form", MO, MONum); - - // Check LiveInts for a live range, but only for virtual registers. - if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && - !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { - report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " - << DefIdx << " in " << LI << '\n'; - } - } else { - report("No live range at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - } // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { break; } + case MachineOperand::MO_RegisterMask: + regMasks.push_back(MO->getRegMask()); + break; + case MachineOperand::MO_MachineBasicBlock: if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) report("PHI operand is not in the CFG", MO, MONum); @@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } @@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + bool isKill = false; + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) + isKill = true; + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", MO, MONum); + } + } else + isKill = MO->isKill(); + + if (isKill) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); + // Kill any masked registers. + while (!regMasks.empty()) { + const uint32_t *Mask = regMasks.pop_back_val(); + for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) + if (TargetRegisterInfo::isPhysicalRegister(*I) && + MachineOperand::clobbersPhysReg(Mask, *I)) + regsDead.push_back(*I); + } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); @@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() { // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() { // Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet<const MachineBasicBlock*, 8> seen; for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) { - DenseSet<const MachineBasicBlock*> seen; + seen.clear(); for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); @@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() { } // Now check liveness info if available - if (LiveVars || LiveInts) - calcRegsRequired(); + calcRegsRequired(); + + if (MRI->isSSA() && !MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + if (LiveVars) verifyLiveVariables(); if (LiveInts) @@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() { report("No instruction at def index", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; - } else if (!MI->modifiesRegister(LI.reg, TRI)) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + continue; } + bool hasDef = false; bool isEarlyClobber = false; - if (MI) { - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && - MOI->isEarlyClobber()) { - isEarlyClobber = true; - break; - } + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->regsOverlap(LI.reg, MOI->getReg())) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } + + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; } // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { - if (!VNI->def.isUse()) { - report("Early clobber def must be at a USE slot", MF); + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } - } else if (!VNI->def.isDef()) { - report("Non-PHI, non-early clobber def must be at a DEF slot", MF); + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } @@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() { *OS << " in " << LI << '\n'; continue; } - if (I->end != LiveInts->getMBBEndIdx(EndMBB)) { - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); + + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + continue; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB); I->print(*OS); *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) && - !MI->readsVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - // FIXME: Should we check for each of these? - bool hasDeadDef = false; - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) { - hasDeadDef = true; - break; - } - } + << MBBStartIdx << '\n'; + continue; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == E || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) + continue; + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } + if (I->end.isDead()) { if (!hasDeadDef) { - report("Instruction killing live segment neither defines nor reads " - "register", MI); + report("Instruction doesn't have a dead def operand", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", + MI); I->print(*OS); *OS << " in " << LI << '\n'; } @@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() { // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); - const VNInfo *PVNI = LI.getVNInfoAt(PEnd); + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) continue; @@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!PVNI) { report("Register not marked live out of predecessor", *PI); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at " + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << " in " << LI << '\n'; continue; } diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp deleted file mode 100644 index cf05275d7a31..000000000000 --- a/lib/CodeGen/ObjectCodeEmitter.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" - -//===----------------------------------------------------------------------===// -// ObjectCodeEmitter Implementation -//===----------------------------------------------------------------------===// - -namespace llvm { - -ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {} -ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {} -ObjectCodeEmitter::~ObjectCodeEmitter() {} - -/// setBinaryObject - set the BinaryObject we are writting to -void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; } - -/// emitByte - This callback is invoked when a byte needs to be -/// written to the data stream, without buffer overflow testing. -void ObjectCodeEmitter::emitByte(uint8_t B) { - BO->emitByte(B); -} - -/// emitWordLE - This callback is invoked when a 32-bit word needs to be -/// written to the data stream in little-endian format. -void ObjectCodeEmitter::emitWordLE(uint32_t W) { - BO->emitWordLE(W); -} - -/// emitWordBE - This callback is invoked when a 32-bit word needs to be -/// written to the data stream in big-endian format. -void ObjectCodeEmitter::emitWordBE(uint32_t W) { - BO->emitWordBE(W); -} - -/// emitDWordLE - This callback is invoked when a 64-bit word needs to be -/// written to the data stream in little-endian format. -void ObjectCodeEmitter::emitDWordLE(uint64_t W) { - BO->emitDWordLE(W); -} - -/// emitDWordBE - This callback is invoked when a 64-bit word needs to be -/// written to the data stream in big-endian format. -void ObjectCodeEmitter::emitDWordBE(uint64_t W) { - BO->emitDWordBE(W); -} - -/// emitAlignment - Align 'BO' to the necessary alignment boundary. -void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */, - uint8_t fill /* 0 */) { - BO->emitAlignment(Alignment, fill); -} - -/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) { - BO->emitULEB128Bytes(Value); -} - -/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) { - BO->emitSLEB128Bytes(Value); -} - -/// emitString - This callback is invoked when a String needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitString(const std::string &String) { - BO->emitString(String); -} - -/// getCurrentPCValue - This returns the address that the next emitted byte -/// will be output to. -uintptr_t ObjectCodeEmitter::getCurrentPCValue() const { - return BO->getCurrentPCOffset(); -} - -/// getCurrentPCOffset - Return the offset from the start of the emitted -/// buffer that we are currently writing to. -uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const { - return BO->getCurrentPCOffset(); -} - -/// addRelocation - Whenever a relocatable address is needed, it should be -/// noted with this interface. -void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) { - BO->addRelocation(relocation); -} - -/// StartMachineBasicBlock - This should be called by the target when a new -/// basic block is about to be emitted. This way the MCE knows where the -/// start of the block is, and can implement getMachineBasicBlockAddress. -void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); -} - -/// getMachineBasicBlockAddress - Return the address of the specified -/// MachineBasicBlock, only usable after the label for the MBB has been -/// emitted. -uintptr_t -ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; -} - -/// getJumpTableEntryAddress - Return the address of the jump table with index -/// 'Index' in the function that last called initJumpTableInfo. -uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; -} - -/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in -/// the constant pool that was last emitted with the emitConstantPool method. -uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; -} - -/// getConstantPoolEntrySection - Return the section of the 'Index' entry in -/// the constant pool that was last emitted with the emitConstantPool method. -uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const { - assert(CPSections.size() > Index && "CP not emitted!"); - return CPSections[Index]; -} - -} // end namespace llvm - diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index c05be130ec61..6da313e632af 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -56,11 +56,10 @@ namespace { } char OptimizePHIs::ID = 0; +char &llvm::OptimizePHIsID = OptimizePHIs::ID; INITIALIZE_PASS(OptimizePHIs, "opt-phis", "Optimize machine instruction PHIs", false, false) -FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } - bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); TII = Fn.getTarget().getInstrInfo(); @@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { InstrSet PHIsInCycle; if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { - MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + unsigned OldReg = MI->getOperand(0).getReg(); + if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) + continue; + + MRI->replaceRegWith(OldReg, SingleValReg); MI->eraseFromParent(); ++NumPHICycles; Changed = true; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 6994aa58fbd5..0ed4c34bb105 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; -INITIALIZE_PASS(PHIElimination, "phi-node-elimination", - "Eliminate PHI nodes for register allocation", false, false) - char& llvm::PHIEliminationID = PHIElimination::ID; +INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false) + void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); AU.addPreserved<MachineDominatorTree>(); @@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode( LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); // Increment use count of the newly created virtual register. - VI.NumUses++; LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been @@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, return false; // Quick exit for basic blocks without PHIs. bool Changed = false; - for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 315aedddb9ef..53d1fcf7377a 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -12,62 +12,617 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; +static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc")); +static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, + cl::desc("Disable branch folding")); +static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); +static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, + cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt<bool> EnableBlockPlacement("enable-block-placement", + cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", + cl::Hidden, cl::desc("Collect probability-driven block placement stats")); +static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, + cl::desc("Disable code placement")); +static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, + cl::desc("Disable Stack Slot Coloring")); +static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, + cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, + cl::desc("Disable Machine Common Subexpression Elimination")); +static cl::opt<cl::boolOrDefault> +OptimizeRegAlloc("optimize-regalloc", cl::Hidden, + cl::desc("Enable optimized register allocation compilation path.")); +static cl::opt<cl::boolOrDefault> +EnableMachineSched("enable-misched", cl::Hidden, + cl::desc("Enable the machine instruction scheduling pass.")); +static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden, + cl::desc("Use strong PHI elimination.")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, + cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, + cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, + cl::desc("Disable Codegen Prepare")); +static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, + cl::desc("Disable Copy Propagation pass")); +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + +/// Allow standard passes to be disabled by command line options. This supports +/// simple binary flags that either suppress the pass or do nothing. +/// i.e. -disable-mypass=false has no effect. +/// These should be converted to boolOrDefault in order to use applyOverride. +static AnalysisID applyDisable(AnalysisID ID, bool Override) { + if (Override) + return &NoPassID; + return ID; +} + +/// Allow Pass selection to be overriden by command line options. This supports +/// flags with ternary conditions. TargetID is passed through by default. The +/// pass is suppressed when the option is false. When the option is true, the +/// StandardID is selected if the target provides no default. +static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, + AnalysisID StandardID) { + switch (Override) { + case cl::BOU_UNSET: + return TargetID; + case cl::BOU_TRUE: + if (TargetID != &NoPassID) + return TargetID; + if (StandardID == &NoPassID) + report_fatal_error("Target cannot enable pass"); + return StandardID; + case cl::BOU_FALSE: + return &NoPassID; + } + llvm_unreachable("Invalid command line option state"); +} + +/// Allow standard passes to be disabled by the command line, regardless of who +/// is adding the pass. +/// +/// StandardID is the pass identified in the standard pass pipeline and provided +/// to addPass(). It may be a target-specific ID in the case that the target +/// directly adds its own pass, but in that case we harmlessly fall through. +/// +/// TargetID is the pass that the target has configured to override StandardID. +/// +/// StandardID may be a pseudo ID. In that case TargetID is the name of the real +/// pass to run. This allows multiple options to control a single pass depending +/// on where in the pipeline that pass is added. +static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { + if (StandardID == &PostRASchedulerID) + return applyDisable(TargetID, DisablePostRA); + + if (StandardID == &BranchFolderPassID) + return applyDisable(TargetID, DisableBranchFold); + + if (StandardID == &TailDuplicateID) + return applyDisable(TargetID, DisableTailDuplicate); + + if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + return applyDisable(TargetID, DisableEarlyTailDup); + + if (StandardID == &MachineBlockPlacementID) + return applyDisable(TargetID, DisableCodePlace); + + if (StandardID == &CodePlacementOptID) + return applyDisable(TargetID, DisableCodePlace); + + if (StandardID == &StackSlotColoringID) + return applyDisable(TargetID, DisableSSC); + + if (StandardID == &DeadMachineInstructionElimID) + return applyDisable(TargetID, DisableMachineDCE); + + if (StandardID == &MachineLICMID) + return applyDisable(TargetID, DisableMachineLICM); + + if (StandardID == &MachineCSEID) + return applyDisable(TargetID, DisableMachineCSE); + + if (StandardID == &MachineSchedulerID) + return applyOverride(TargetID, EnableMachineSched, StandardID); + + if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + return applyDisable(TargetID, DisablePostRAMachineLICM); + + if (StandardID == &MachineSinkingID) + return applyDisable(TargetID, DisableMachineSink); + + if (StandardID == &MachineCopyPropagationID) + return applyDisable(TargetID, DisableCopyProp); + + return TargetID; +} + //===---------------------------------------------------------------------===// +/// TargetPassConfig +//===---------------------------------------------------------------------===// + +INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", + "Target Pass Configuration", false, false) +char TargetPassConfig::ID = 0; + +static char NoPassIDAnchor = 0; +char &llvm::NoPassID = NoPassIDAnchor; + +// Pseudo Pass IDs. +char TargetPassConfig::EarlyTailDuplicateID = 0; +char TargetPassConfig::PostRAMachineLICMID = 0; + +namespace llvm { +class PassConfigImpl { +public: + // List of passes explicitly substituted by this target. Normally this is + // empty, but it is a convenient way to suppress or replace specific passes + // that are part of a standard pass pipeline without overridding the entire + // pipeline. This mechanism allows target options to inherit a standard pass's + // user interface. For example, a target may disable a standard pass by + // default by substituting NoPass, and the user may still enable that standard + // pass with an explicit command line option. + DenseMap<AnalysisID,AnalysisID> TargetPasses; +}; +} // namespace llvm + +// Out of line virtual method. +TargetPassConfig::~TargetPassConfig() { + delete Impl; +} + +// Out of line constructor provides default values for pass options and +// registers all common codegen passes. +TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) + : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false), + DisableVerify(false), + EnableTailMerge(true) { + + Impl = new PassConfigImpl(); + + // Register all target independent codegen passes to activate their PassIDs, + // including this pass itself. + initializeCodeGen(*PassRegistry::getPassRegistry()); + + // Substitute Pseudo Pass IDs for real ones. + substitutePass(EarlyTailDuplicateID, TailDuplicateID); + substitutePass(PostRAMachineLICMID, MachineLICMID); + + // Temporarily disable experimental passes. + substitutePass(MachineSchedulerID, NoPassID); +} + +/// createPassConfig - Create a pass configuration object to be used by +/// addPassToEmitX methods for generating a pipeline of CodeGen passes. +/// +/// Targets may override this to extend TargetPassConfig. +TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { + return new TargetPassConfig(this, PM); +} + +TargetPassConfig::TargetPassConfig() + : ImmutablePass(ID), PM(*(PassManagerBase*)0) { + llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); +} + +// Helper to verify the analysis is really immutable. +void TargetPassConfig::setOpt(bool &Opt, bool Val) { + assert(!Initialized && "PassConfig is immutable"); + Opt = Val; +} + +void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) { + Impl->TargetPasses[&StandardID] = &TargetID; +} + +AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap<AnalysisID, AnalysisID>::const_iterator + I = Impl->TargetPasses.find(ID); + if (I == Impl->TargetPasses.end()) + return ID; + return I->second; +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(char &ID) { + assert(!Initialized && "PassConfig is immutable"); + + AnalysisID TargetID = getPassSubstitution(&ID); + AnalysisID FinalID = overridePass(&ID, TargetID); + if (FinalID == &NoPassID) + return FinalID; + + Pass *P = Pass::createPass(FinalID); + if (!P) + llvm_unreachable("Pass ID not registered"); + PM.add(P); + return FinalID; +} + +void TargetPassConfig::printAndVerify(const char *Banner) const { + if (TM->shouldPrintMachineCode()) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + if (VerifyMachineCode) + PM.add(createMachineVerifierPass(Banner)); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +void TargetPassConfig::addIRPasses() { + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +void TargetPassConfig::addISelPrepare() { + if (getOptLevel() != CodeGenOpt::None && !DisableCGP) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(); + + if (PrintISelInput) + PM.add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); +} + +/// Add the complete set of target-independent postISel code generator passes. /// -/// RegisterRegAlloc class - Track the registration of register allocators. +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. /// +/// Any TargetPassConfig::addXX routine may be overriden by the Target. The +/// addPre/Post methods with empty header implementations allow injecting +/// target-specific fixups just before or after major stages. Additionally, +/// targets have the flexibility to change pass order within a stage by +/// overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +/// +/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection +/// before/after any target-independent pass. But it's currently overkill. +void TargetPassConfig::addMachinePasses() { + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + addPass(ExpandISelPseudosID); + + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + addMachineSSAOptimization(); + } + else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotAllocationID); + } + + // Run pre-ra passes. + if (addPreRegAlloc()) + printAndVerify("After PreRegAlloc passes"); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (getOptimizeRegAlloc()) + addOptimizedRegAlloc(createRegAllocPass(true)); + else + addFastRegAlloc(createRegAllocPass(false)); + + // Run post-ra passes. + if (addPostRegAlloc()) + printAndVerify("After PostRegAlloc passes"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + addPass(PrologEpilogCodeInserterID); + printAndVerify("After PrologEpilogCodeInserter"); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + addMachineLateOptimization(); + + // Expand pseudo instructions before second scheduling pass. + addPass(ExpandPostRAPseudosID); + printAndVerify("After ExpandPostRAPseudos"); + + // Run pre-sched2 passes. + if (addPreSched2()) + printAndVerify("After PreSched2 passes"); + + // Second pass scheduler. + if (getOptLevel() != CodeGenOpt::None) { + addPass(PostRASchedulerID); + printAndVerify("After PostRAScheduler"); + } + + // GC + addPass(GCMachineCodeAnalysisID); + if (PrintGCInfo) + PM.add(createGCInfoPrinter(dbgs())); + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + addBlockPlacement(); + + if (addPreEmitPass()) + printAndVerify("After PreEmit passes"); +} + +/// Add passes that optimize machine instructions in SSA form. +void TargetPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + if (addPass(EarlyTailDuplicateID) != &NoPassID) + printAndVerify("After Pre-RegAlloc TailDuplicate"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(OptimizePHIsID); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotAllocationID); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(DeadMachineInstructionElimID); + printAndVerify("After codegen DCE pass"); + + addPass(MachineLICMID); + addPass(MachineCSEID); + addPass(MachineSinkingID); + printAndVerify("After Machine LICM, CSE and Sinking passes"); + + addPass(PeepholeOptimizerID); + printAndVerify("After codegen peephole optimization pass"); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration //===---------------------------------------------------------------------===// + +bool TargetPassConfig::getOptimizeRegAlloc() const { + switch (OptimizeRegAlloc) { + case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None; + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid optimize-regalloc state"); +} + +/// RegisterRegAlloc's global Registry tracks allocator registration. MachinePassRegistry RegisterRegAlloc::Registry; -static FunctionPass *createDefaultRegisterAllocator() { return 0; } +/// A dummy default pass factory indicates whether the register allocator is +/// overridden on the command line. +static FunctionPass *useDefaultRegisterAllocator() { return 0; } static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", - createDefaultRegisterAllocator); + useDefaultRegisterAllocator); -//===---------------------------------------------------------------------===// -/// -/// RegAlloc command line options. -/// -//===---------------------------------------------------------------------===// +/// -regalloc=... command line option. static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, RegisterPassParser<RegisterRegAlloc> > RegAlloc("regalloc", - cl::init(&createDefaultRegisterAllocator), + cl::init(&useDefaultRegisterAllocator), cl::desc("Register allocator to use")); -//===---------------------------------------------------------------------===// +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. /// -/// createRegisterAllocator - choose the appropriate register allocator. +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) { + if (Optimized) + return createGreedyRegisterAllocator(); + else + return createFastRegisterAllocator(); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. /// -//===---------------------------------------------------------------------===// -FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { +/// This helper ensures that the regalloc= option is always available, +/// even for targets that override the default allocator. +/// +/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs, +/// this can be folded into addPass. +FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + // Initialize the global default. if (!Ctor) { Ctor = RegAlloc; RegisterRegAlloc::setDefault(RegAlloc); } + if (Ctor != useDefaultRegisterAllocator) + return Ctor(); - // This forces linking of the linear scan register allocator, - // so -regalloc=linearscan still works in clang. - if (Ctor == createLinearScanRegisterAllocator) - return createLinearScanRegisterAllocator(); + // With no -regalloc= override, ask the target for a regalloc pass. + return createTargetRegisterAllocator(Optimized); +} - if (Ctor != createDefaultRegisterAllocator) - return Ctor(); +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(PHIEliminationID); + addPass(TwoAddressInstructionPassID); - // When the 'default' allocator is requested, pick one based on OptLevel. - switch (OptLevel) { - case CodeGenOpt::None: - return createFastRegisterAllocator(); - default: - return createGreedyRegisterAllocator(); + PM.add(RegAllocPass); + printAndVerify("After Register Allocation"); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + // LiveVariables currently requires pure SSA form. + // + // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, + // LiveVariables can be removed completely, and LiveIntervals can be directly + // computed. (We still either need to regenerate kill flags after regalloc, or + // preferably fix the scavenger to not depend on them). + addPass(LiveVariablesID); + + // Add passes that move from transformed SSA into conventional SSA. This is a + // "copy coalescing" problem. + // + if (!EnableStrongPHIElim) { + // Edge splitting is smarter with machine loop info. + addPass(MachineLoopInfoID); + addPass(PHIEliminationID); + } + addPass(TwoAddressInstructionPassID); + + // FIXME: Either remove this pass completely, or fix it so that it works on + // SSA form. We could modify LiveIntervals to be independent of this pass, But + // it would be even better to simply eliminate *all* IMPLICIT_DEFs before + // leaving SSA. + addPass(ProcessImplicitDefsID); + + if (EnableStrongPHIElim) + addPass(StrongPHIEliminationID); + + addPass(RegisterCoalescerID); + + // PreRA instruction scheduling. + if (addPass(MachineSchedulerID) != &NoPassID) + printAndVerify("After Machine Scheduling"); + + // Add the selected register allocation pass. + PM.add(RegAllocPass); + printAndVerify("After Register Allocation"); + + // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, + // but eventually, all users of it should probably be moved to addPostRA and + // it can go away. Currently, it's the intended place for targets to run + // FinalizeMachineBundles, because passes other than MachineScheduling an + // RegAlloc itself may not be aware of bundles. + if (addFinalizeRegAlloc()) + printAndVerify("After RegAlloc finalization"); + + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(StackSlotColoringID); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(PostRAMachineLICMID); + + printAndVerify("After StackSlotColoring and postra Machine LICM"); +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +void TargetPassConfig::addMachineLateOptimization() { + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (addPass(BranchFolderPassID) != &NoPassID) + printAndVerify("After BranchFolding"); + + // Tail duplication. + if (addPass(TailDuplicateID) != &NoPassID) + printAndVerify("After TailDuplicate"); + + // Copy propagation. + if (addPass(MachineCopyPropagationID) != &NoPassID) + printAndVerify("After copy propagation pass"); +} + +/// Add standard basic block placement passes. +void TargetPassConfig::addBlockPlacement() { + AnalysisID ID = &NoPassID; + if (EnableBlockPlacement) { + // MachineBlockPlacement is an experimental pass which is disabled by + // default currently. Eventually it should subsume CodePlacementOpt, so + // when enabled, the other is disabled. + ID = addPass(MachineBlockPlacementID); + } else { + ID = addPass(CodePlacementOptID); + } + if (ID != &NoPassID) { + // Run a separate pass to collect block placement statistics. + if (EnableBlockPlacementStats) + addPass(MachineBlockPlacementStatsID); + + printAndVerify("After machine block placement."); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index bbc7ce2d0a42..9c5c029000c0 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -39,7 +39,7 @@ // => // v1 = bitcast v0 // = v0 -// +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); -STATISTIC(NumImmFold, "Number of move immediate foled"); +STATISTIC(NumImmFold, "Number of move immediate folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -109,22 +109,19 @@ namespace { } char PeepholeOptimizer::ID = 0; +char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -FunctionPass *llvm::createPeepholeOptimizerPass() { - return new PeepholeOptimizer(); -} - /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. -/// +/// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. @@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; - + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; @@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (PHIBBs.count(UseMBB)) continue; + // About to add uses of DstReg, clear DstReg's kill flags. + if (!Changed) + MRI->clearKillFlags(DstReg); + unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) @@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, assert(Def && Src && "Malformed bitcast instruction!"); MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->getDesc().isBitcast()) + if (!DefMI || !DefMI->isBitcast()) return false; unsigned SrcSrc = 0; @@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isMoveImmediate()) + if (!MI->isMoveImmediate()) return false; if (MCID.getNumDefs() != 1) return false; @@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, ImmDefRegs.insert(Reg); return true; } - + return false; } @@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; - + TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); @@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap<unsigned, MachineInstr*> ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; - + bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); @@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - - if (MCID.isBitcast()) { + if (MI->isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; - } - } else if (MCID.isCompare()) { + } + } else if (MI->isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index c73e87733cb4..24d3e5ab0c9d 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,6 @@ #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" #include "RegisterClassInfo.h" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" @@ -45,7 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); @@ -82,16 +81,15 @@ namespace { AliasAnalysis *AA; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; - CodeGenOpt::Level OptLevel; public: static char ID; - PostRAScheduler(CodeGenOpt::Level ol) : - MachineFunctionPass(ID), OptLevel(ol) {} + PostRAScheduler() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -99,10 +97,6 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { - return "Post RA top-down list latency scheduler"; - } - bool runOnMachineFunction(MachineFunction &Fn); }; char PostRAScheduler::ID = 0; @@ -130,36 +124,49 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// KillIndices - The index of the most recent kill (proceding bottom-up), - /// or ~0u if the register is not live. - std::vector<unsigned> KillIndices; + /// LiveRegs - true if the register is live. + BitVector LiveRegs; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, AliasAnalysis *AA, const RegisterClassInfo&, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); + SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs); ~SchedulePostRATDList(); - /// StartBlock - Initialize register live-range state for scheduling in + /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void StartBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); /// Schedule - Schedule the instruction range using list scheduling. /// - void Schedule(); + void schedule(); + + void EmitSchedule(); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count); - /// FinishBlock - Clean up register live-range state. + /// finishBlock - Clean up register live-range state. /// - void FinishBlock(); + void finishBlock(); /// FixupKills - Fix register kill flags that have been made /// invalid due to scheduling @@ -177,16 +184,23 @@ namespace { // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + void dumpSchedule() const; }; } +char &llvm::PostRASchedulerID = PostRAScheduler::ID; + +INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", + "Post RA top-down list latency scheduler", false, false) + SchedulePostRATDList::SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), - KillIndices(TRI->getNumRegs()) + SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA), + LiveRegs(TRI->getNumRegs()) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() { delete AntiDepBreak; } +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; - SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + TargetSubtargetInfo::ANTIDEP_NONE; + SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) return false; @@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) + if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, + CriticalPathRCs)) return false; } @@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << - ":BB#" << MBB->getNumber() << " ***\n"; + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + << ":BB#" << MBB->getNumber() << " ***\n"; } #endif // Initialize register live-range state for scheduling in this block. - Scheduler.StartBlock(MBB); + Scheduler.startBlock(MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. @@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); - if (TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.Run(MBB, I, Current, CurrentCount); + // Calls are not scheduling boundaries before register allocation, but + // post-ra we don't gain anything by scheduling across calls since we + // don't need to worry about register pressure. + if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { + Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; @@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } I = MI; --Count; + if (MI->isBundle()) + Count -= MI->getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); // Clean up register live-range state. - Scheduler.FinishBlock(); + Scheduler.finishBlock(); // Update register kills Scheduler.FixupKills(MBB); @@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// -void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Call the superclass. - ScheduleDAGInstrs::StartBlock(BB); + ScheduleDAGInstrs::startBlock(BB); // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); @@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// -void SchedulePostRATDList::Schedule() { +void SchedulePostRATDList::schedule() { // Build the scheduling graph. - BuildSchedGraph(AA); + buildSchedGraph(AA); if (AntiDepBreak != NULL) { unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex, DbgValues); + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); if (Broken != 0) { // We made changes. Update the dependency graph. @@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() { // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); NumFixedAnti += Broken; } @@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() { /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { if (AntiDepBreak != NULL) - AntiDepBreak->Observe(MI, Count, InsertPosIndex); + AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// -void SchedulePostRATDList::FinishBlock() { +void SchedulePostRATDList::finishBlock() { if (AntiDepBreak != NULL) AntiDepBreak->FinishBlock(); // Call the superclass. - ScheduleDAGInstrs::FinishBlock(); + ScheduleDAGInstrs::finishBlock(); } /// StartBlockForKills - Initialize register live-range state for updating kills /// void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { - // Initialize the indices to indicate that no registers are live. - for (unsigned i = 0; i < TRI->getNumRegs(); ++i) - KillIndices[i] = ~0u; + // Start with no live registers. + LiveRegs.reset(); // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; - KillIndices[Reg] = BB->size(); + LiveRegs.set(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = BB->size(); - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } else { @@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - KillIndices[Reg] = BB->size(); + LiveRegs.set(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = BB->size(); - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } } @@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, } // If MO itself is live, clear the kill flag... - if (KillIndices[MO.getReg()] != ~0u) { + if (LiveRegs.test(MO.getReg())) { MO.setIsKill(false); return false; } @@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); - for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); + for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg); *Subreg; ++Subreg) { - if (KillIndices[*Subreg] != ~0u) { + if (LiveRegs.test(*Subreg)) { MI->addOperand(MachineOperand::CreateReg(*Subreg, true /*IsDef*/, true /*IsImp*/, @@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - std::set<unsigned> killedRegs; + BitVector killedRegs(TRI->getNumRegs()); BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); @@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // are completely defined. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - KillIndices[Reg] = ~0u; + LiveRegs.reset(Reg); // Repeat for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = ~0u; - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.reset(*Subreg); } // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on // the first use. - killedRegs.clear(); + killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; @@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if ((Reg == 0) || ReservedRegs.test(Reg)) continue; bool kill = false; - if (killedRegs.find(Reg) == killedRegs.end()) { + if (!killedRegs.test(Reg)) { kill = true; // A register is not killed if any subregs are live... - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { - if (KillIndices[*Subreg] != ~0u) { + if (LiveRegs.test(*Subreg)) { kill = false; break; } @@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // If subreg is not live, then register is killed if it became // live in this instruction if (kill) - kill = (KillIndices[Reg] == ~0u); + kill = !LiveRegs.test(Reg); } if (MO.isKill() != kill) { @@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(MI->dump()); } - killedRegs.insert(Reg); + killedRegs.set(Reg); } // Mark any used register (that is not using undef) and subregs as @@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if ((Reg == 0) || ReservedRegs.test(Reg)) continue; - KillIndices[Reg] = Count; + LiveRegs.set(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = Count; - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } } @@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { ReleaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue.ScheduledNode(SU); + AvailableQueue.scheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down @@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); -#endif + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG } -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; + + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = prior(RegionEnd); + } -FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) { - return new PostRAScheduler(OptLevel); + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b1d8c9760225..1ad3479afb4c 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -26,6 +26,8 @@ using namespace llvm; char ProcessImplicitDefs::ID = 0; +char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; + INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) INITIALIZE_PASS_DEPENDENCY(LiveVariables) @@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); - AU.addRequired<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); AU.addPreservedID(TwoAddressInstructionPassID); @@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, SmallSet<unsigned, 8> &ImpDefRegs) { switch(OpIdx) { case 1: - return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isCopy() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); case 2: - return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); default: return false; } @@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg, MachineOperand &MO1 = MI->getOperand(1); if (MO1.getReg() != Reg) return false; - if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) return true; return false; } @@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { TII = fn.getTarget().getInstrInfo(); TRI = fn.getTarget().getRegisterInfo(); MRI = &fn.getRegInfo(); - LV = &getAnalysis<LiveVariables>(); + LV = getAnalysisIfAvailable<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; @@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { - if (MI->getOperand(0).getSubReg()) + ImpDefMIs.push_back(MI); + // Is this a sub-register read-modify-write? + if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } - ImpDefMIs.push_back(MI); continue; } // Eliminate %reg1032:sub<def> = COPY undef. - if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { - if (MO.isKill()) { + if (LV && MO.isKill()) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } @@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) + if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(MI); + if (LV) { + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); + vi.removeKill(MI); + } } ChangedToImpDef = true; Changed = true; @@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other uses of + // Make sure other reads of Reg are also marked <undef>. for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); @@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { } // Update LiveVariables varinfo if the instruction is a kill. - if (isKill) { + if (LV && isKill) { LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 32c932552bed..458915ea5d93 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -45,24 +45,22 @@ using namespace llvm; char PEI::ID = 0; +char &llvm::PrologEpilogCodeInserterID = PEI::ID; INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(PEI, "prologepilog", - "Prologue/Epilogue Insertion", false, false) + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); -/// createPrologEpilogCodeInserter - This function returns a pass that inserts -/// prolog and epilog code, and eliminates abstract frame references. -/// -FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); @@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + delete RS; clearAllSets(); return true; @@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; @@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); - } else { - for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); - *AliasSet; ++AliasSet) { // Check alias registers too. - if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg)); - break; - } - } } } @@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); @@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the // return sequence. - if (! I->getDesc().isTerminator()) { + if (! I->isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; } } @@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // Add epilogue to restore the callee-save registers in each exiting block for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) + if (!I->empty() && I->back().isReturn()) TFI.emitEpilogue(Fn, *I); } @@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (EnableSegmentedStacks) + if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); } @@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index e2391591ad06..0d140a9bb481 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -40,10 +40,6 @@ namespace llvm { initializePEIPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const { - return "Prolog/Epilog Insertion & Frame Finalization"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 73b66d868f3d..49599b3ab980 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { this == getJumpTable()) return true; llvm_unreachable("Unknown PseudoSourceValue!"); - return false; } bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { @@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { this == getJumpTable()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); - return true; } bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp new file mode 100644 index 000000000000..b00eceb17f11 --- /dev/null +++ b/lib/CodeGen/RegAllocBase.cpp @@ -0,0 +1,280 @@ +//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class which provides comon functionality +// for LiveIntervalUnion-based register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#ifndef NDEBUG +#include "llvm/ADT/SparseBitVector.h" +#endif +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); +STATISTIC(NumNewQueued , "Number of new live ranges queued"); + +// Temporary verification option until we can put verification inside +// MachineVerifier. +static cl::opt<bool, true> +VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::desc("Verify during register allocation")); + +const char *RegAllocBase::TimerGroupName = "Register Allocation"; +bool RegAllocBase::VerifyEnabled = false; + +#ifndef NDEBUG +// Verify each LiveIntervalUnion. +void RegAllocBase::verify() { + LiveVirtRegBitSet VisitedVRegs; + OwningArrayPtr<LiveVirtRegBitSet> + unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); + + // Verify disjoint unions. + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); + LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; + PhysReg2LiveUnion[PhysReg].verify(VRegs); + // Union + intersection test could be done efficiently in one pass, but + // don't add a method to SparseBitVector unless we really need it. + assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); + VisitedVRegs |= VRegs; + } + + // Verify vreg coverage. + for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); + liItr != liEnd; ++liItr) { + unsigned reg = liItr->first; + if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; + if (!VRM->hasPhys(reg)) continue; // spilled? + unsigned PhysReg = VRM->getPhys(reg); + if (!unionVRegs[PhysReg].test(reg)) { + dbgs() << "LiveVirtReg " << reg << " not in union " << + TRI->getName(PhysReg) << "\n"; + llvm_unreachable("unallocated live vreg"); + } + } + // FIXME: I'm not sure how to verify spilled intervals. +} +#endif //!NDEBUG + +//===----------------------------------------------------------------------===// +// RegAllocBase Implementation +//===----------------------------------------------------------------------===// + +// Instantiate a LiveIntervalUnion for each physical register. +void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, + unsigned NRegs) { + NumRegs = NRegs; + Array = + static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs)); + for (unsigned r = 0; r != NRegs; ++r) + new(Array + r) LiveIntervalUnion(r, allocator); +} + +void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { + NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); + TRI = &vrm.getTargetRegInfo(); + MRI = &vrm.getRegInfo(); + VRM = &vrm; + LIS = &lis; + MRI->freezeReservedRegs(vrm.getMachineFunction()); + RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } +} + +void RegAllocBase::LiveUnionArray::clear() { + if (!Array) + return; + for (unsigned r = 0; r != NumRegs; ++r) + Array[r].~LiveIntervalUnion(); + free(Array); + NumRegs = 0; + Array = 0; +} + +void RegAllocBase::releaseMemory() { + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); +} + +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); + for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { + unsigned RegNum = I->first; + LiveInterval &VirtReg = *I->second; + if (TargetRegisterInfo::isPhysicalRegister(RegNum)) + PhysReg2LiveUnion[RegNum].unify(VirtReg); + else + enqueue(&VirtReg); + } +} + +void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << '\n'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); + PhysReg2LiveUnion[PhysReg].unify(VirtReg); + ++NumAssigned; +} + +void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << '\n'); + assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); + PhysReg2LiveUnion[PhysReg].extract(VirtReg); + VRM->clearVirt(VirtReg.reg); + ++NumUnassigned; +} + +// Top-level driver to manage the queue of unassigned VirtRegs and call the +// selectOrSplit implementation. +void RegAllocBase::allocatePhysRegs() { + seedLiveRegs(); + + // Continue assigning vregs one at a time to available physical registers. + while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + invalidateVirtRegs(); + + // selectOrSplit requests the allocator to return an available physical + // register if possible and populate a list of new live intervals that + // result from splitting. + DEBUG(dbgs() << "\nselectOrSplit " + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << '\n'); + typedef SmallVector<LiveInterval*, 4> VirtRegVec; + VirtRegVec SplitVRegs; + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + + if (AvailablePhysReg == ~0u) { + // selectOrSplit failed to find a register! + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; + } + + if (AvailablePhysReg) + assign(*VirtReg, AvailablePhysReg); + + for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); + I != E; ++I) { + LiveInterval *SplitVirtReg = *I; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } + DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + "expect split value in virtual register"); + enqueue(SplitVirtReg); + ++NumNewQueued; + } + } +} + +// Check if this live virtual register interferes with a physical register. If +// not, then check for interference on each register that aliases with the +// physical register. Return the interfering register. +unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + if (query(VirtReg, *AliasI).checkInterference()) + return *AliasI; + return 0; +} + +// Add newly allocated physical registers to the MBB live in sets. +void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { + NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + + LiveIntervalUnion::SegmentIter SI; + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; + if (LiveUnion.empty()) + continue; + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' + << PrintReg(SI.value()->reg, TRI)); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); + } + DEBUG(dbgs() << '\n'); + } +} + diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 031642117efc..072fe2bdb656 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -49,11 +49,6 @@ class VirtRegMap; class LiveIntervals; class Spiller; -// Forward declare a priority queue of live virtual registers. If an -// implementation needs to prioritize by anything other than spill weight, then -// this will become an abstract base class with virtual calls to push/get. -class LiveVirtRegQueue; - /// RegAllocBase provides the register allocation driver and interface that can /// be extended to add interesting heuristics. /// @@ -67,7 +62,6 @@ class RegAllocBase { // registers may have changed. unsigned UserTag; -protected: // Array of LiveIntervalUnions indexed by physical register. class LiveUnionArray { unsigned NumRegs; @@ -88,17 +82,19 @@ protected: } }; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - VirtRegMap *VRM; - LiveIntervals *LIS; - RegisterClassInfo RegClassInfo; LiveUnionArray PhysReg2LiveUnion; // Current queries, one per physreg. They must be reinitialized each time we // query on a new live virtual register. OwningArrayPtr<LiveIntervalUnion::Query> Queries; +protected: + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + VirtRegMap *VRM; + LiveIntervals *LIS; + RegisterClassInfo RegClassInfo; + RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} virtual ~RegAllocBase() {} @@ -115,16 +111,17 @@ protected: return Queries[PhysReg]; } + // Get direct access to the underlying LiveIntervalUnion for PhysReg. + LiveIntervalUnion &getLiveUnion(unsigned PhysReg) { + return PhysReg2LiveUnion[PhysReg]; + } + // Invalidate all cached information about virtual registers - live ranges may // have changed. void invalidateVirtRegs() { ++UserTag; } // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. - // - // If an implementation wants to override the LiveInterval comparator, we - // should modify this interface to allow passing in an instance derived from - // LiveVirtRegQueue. void allocatePhysRegs(); // Get a temporary reference to a Spiller instance. @@ -160,12 +157,6 @@ protected: /// allocation is making progress. void unassign(LiveInterval &VirtReg, unsigned PhysReg); - // Helper for spilling all live virtual registers currently unified under preg - // that interfere with the most recently queried lvr. Return true if spilling - // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); - /// addMBBLiveIns - Add physreg liveins to basic blocks. void addMBBLiveIns(MachineFunction *); @@ -183,9 +174,6 @@ public: private: void seedLiveRegs(); - - void spillReg(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); }; } // end namespace llvm diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 5496d69fd3df..77ee3148f31a 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocBasic.cpp - basic register allocator ----------------------===// +//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===// // // The LLVM Compiler Infrastructure // @@ -15,18 +15,15 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "LiveIntervalUnion.h" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -37,35 +34,17 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#ifndef NDEBUG -#include "llvm/ADT/SparseBitVector.h" -#endif #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" #include <cstdlib> #include <queue> using namespace llvm; -STATISTIC(NumAssigned , "Number of registers assigned"); -STATISTIC(NumUnassigned , "Number of registers unassigned"); -STATISTIC(NumNewQueued , "Number of new live ranges queued"); - static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", createBasicRegisterAllocator); -// Temporary verification option until we can put verification inside -// MachineVerifier. -static cl::opt<bool, true> -VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), - cl::desc("Verify during register allocation")); - -const char *RegAllocBase::TimerGroupName = "Register Allocation"; -bool RegAllocBase::VerifyEnabled = false; - namespace { struct CompSpillWeight { bool operator()(LiveInterval *A, LiveInterval *B) const { @@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase std::auto_ptr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, CompSpillWeight> Queue; + + // Scratch space. Allocated here to avoid repeated malloc calls in + // selectOrSplit(). + BitVector UsableRegs; + public: RABasic(); @@ -128,6 +112,15 @@ public: /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); + // Helper for spilling all live virtual registers currently unified under preg + // that interfere with the most recently queried lvr. Return true if spilling + // was successful, and append any new spilled/split intervals to splitLVRs. + bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + + void spillReg(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + static char ID; }; @@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); @@ -178,204 +168,10 @@ void RABasic::releaseMemory() { RegAllocBase::releaseMemory(); } -#ifndef NDEBUG -// Verify each LiveIntervalUnion. -void RegAllocBase::verify() { - LiveVirtRegBitSet VisitedVRegs; - OwningArrayPtr<LiveVirtRegBitSet> - unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); - - // Verify disjoint unions. - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); - LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; - PhysReg2LiveUnion[PhysReg].verify(VRegs); - // Union + intersection test could be done efficiently in one pass, but - // don't add a method to SparseBitVector unless we really need it. - assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); - VisitedVRegs |= VRegs; - } - - // Verify vreg coverage. - for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); - liItr != liEnd; ++liItr) { - unsigned reg = liItr->first; - if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; - if (!VRM->hasPhys(reg)) continue; // spilled? - unsigned PhysReg = VRM->getPhys(reg); - if (!unionVRegs[PhysReg].test(reg)) { - dbgs() << "LiveVirtReg " << reg << " not in union " << - TRI->getName(PhysReg) << "\n"; - llvm_unreachable("unallocated live vreg"); - } - } - // FIXME: I'm not sure how to verify spilled intervals. -} -#endif //!NDEBUG - -//===----------------------------------------------------------------------===// -// RegAllocBase Implementation -//===----------------------------------------------------------------------===// - -// Instantiate a LiveIntervalUnion for each physical register. -void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, - unsigned NRegs) { - NumRegs = NRegs; - Array = - static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs)); - for (unsigned r = 0; r != NRegs; ++r) - new(Array + r) LiveIntervalUnion(r, allocator); -} - -void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { - NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); - TRI = &vrm.getTargetRegInfo(); - MRI = &vrm.getRegInfo(); - VRM = &vrm; - LIS = &lis; - RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); - - const unsigned NumRegs = TRI->getNumRegs(); - if (NumRegs != PhysReg2LiveUnion.numRegs()) { - PhysReg2LiveUnion.init(UnionAllocator, NumRegs); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); - } -} - -void RegAllocBase::LiveUnionArray::clear() { - if (!Array) - return; - for (unsigned r = 0; r != NumRegs; ++r) - Array[r].~LiveIntervalUnion(); - free(Array); - NumRegs = 0; - Array = 0; -} - -void RegAllocBase::releaseMemory() { - for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) - PhysReg2LiveUnion[r].clear(); -} - -// Visit all the live registers. If they are already assigned to a physical -// register, unify them with the corresponding LiveIntervalUnion, otherwise push -// them on the priority queue for later assignment. -void RegAllocBase::seedLiveRegs() { - NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { - unsigned RegNum = I->first; - LiveInterval &VirtReg = *I->second; - if (TargetRegisterInfo::isPhysicalRegister(RegNum)) - PhysReg2LiveUnion[RegNum].unify(VirtReg); - else - enqueue(&VirtReg); - } -} - -void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << '\n'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); - PhysReg2LiveUnion[PhysReg].unify(VirtReg); - ++NumAssigned; -} - -void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << '\n'); - assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); - PhysReg2LiveUnion[PhysReg].extract(VirtReg); - VRM->clearVirt(VirtReg.reg); - ++NumUnassigned; -} - -// Top-level driver to manage the queue of unassigned VirtRegs and call the -// selectOrSplit implementation. -void RegAllocBase::allocatePhysRegs() { - seedLiveRegs(); - - // Continue assigning vregs one at a time to available physical registers. - while (LiveInterval *VirtReg = dequeue()) { - assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); - - // Unused registers can appear when the spiller coalesces snippets. - if (MRI->reg_nodbg_empty(VirtReg->reg)) { - DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); - LIS->removeInterval(VirtReg->reg); - continue; - } - - // Invalidate all interference queries, live ranges could have changed. - invalidateVirtRegs(); - - // selectOrSplit requests the allocator to return an available physical - // register if possible and populate a list of new live intervals that - // result from splitting. - DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); - typedef SmallVector<LiveInterval*, 4> VirtRegVec; - VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); - - if (AvailablePhysReg == ~0u) { - // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; - // Probably caused by an inline asm. - MachineInstr *MI; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - (MI = I.skipInstruction());) - if (MI->isInlineAsm()) - break; - if (MI) - MI->emitError(Msg); - else - report_fatal_error(Msg); - // Keep going after reporting the error. - VRM->assignVirt2Phys(VirtReg->reg, - RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); - continue; - } - - if (AvailablePhysReg) - assign(*VirtReg, AvailablePhysReg); - - for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); - I != E; ++I) { - LiveInterval *SplitVirtReg = *I; - assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); - if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { - DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); - LIS->removeInterval(SplitVirtReg->reg); - continue; - } - DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && - "expect split value in virtual register"); - enqueue(SplitVirtReg); - ++NumNewQueued; - } - } -} - -// Check if this live virtual register interferes with a physical register. If -// not, then check for interference on each register that aliases with the -// physical register. Return the interfering register. -unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - if (query(VirtReg, *AliasI).checkInterference()) - return *AliasI; - return 0; -} - -// Helper for spillInteferences() that spills all interfering vregs currently +// Helper for spillInterferences() that spills all interfering vregs currently // assigned to this physical register. -void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { +void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); assert(Q.seenAllInterferences() && "need collectInterferences()"); const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs(); @@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the @@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool -RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, +bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. unsigned NumInterferences = 0; // Collect interferences assigned to any alias of the physical register. - for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { + for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI); NumInterferences += QAlias.collectInterferingVRegs(); if (QAlias.seenUnspillableVReg()) { @@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, assert(NumInterferences > 0 && "expect interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) spillReg(VirtReg, *AliasI, SplitVRegs); return true; } -// Add newly allocated physical registers to the MBB live in sets. -void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { - NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - SlotIndexes *Indexes = LIS->getSlotIndexes(); - if (MF->size() <= 1) - return; - - LiveIntervalUnion::SegmentIter SI; - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; - if (LiveUnion.empty()) - continue; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); - MachineFunction::iterator MBB = llvm::next(MF->begin()); - MachineFunction::iterator MFE = MF->end(); - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.setMap(LiveUnion.getMap()); - SI.find(Start); - while (SI.valid()) { - if (SI.start() <= Start) { - if (!MBB->isLiveIn(PhysReg)) - MBB->addLiveIn(PhysReg); - DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' - << PrintReg(SI.value()->reg, TRI)); - } else if (SI.start() > Stop) - MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); - if (++MBB == MFE) - break; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.advanceTo(Start); - } - DEBUG(dbgs() << '\n'); - } -} - - -//===----------------------------------------------------------------------===// -// RABasic Implementation -//===----------------------------------------------------------------------===// - // Driver for the register assignment and splitting heuristics. // Manages iteration over the LiveIntervalUnions. // @@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { + // Check for register mask interference. When live ranges cross calls, the + // set of usable registers is reduced to the callee-saved ones. + bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs); + // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; @@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, ++I) { unsigned PhysReg = *I; + // If PhysReg is clobbered by a register mask, it isn't useful for + // allocation or spilling. + if (CrossRegMasks && !UsableRegs.test(PhysReg)) + continue; + // Check interference and as a side effect, intialize queries for this // VirtReg and its aliases. unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); @@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, // Found an available register. return PhysReg; } - Queries[interfReg].collectInterferingVRegs(1); - LiveInterval *interferingVirtReg = - Queries[interfReg].interferingVRegs().front(); + LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg); + IntfQ.collectInterferingVRegs(1); + LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front(); // The current VirtReg must either be spillable, or one of its interferences // must have less spill weight. @@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs); + LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Write out new DBG_VALUE instructions. getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); - // The pass output is in VirtRegMap. Release all the transient data. + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); releaseMemory(); return true; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index b36a445291b7..e09b7f8d26be 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -49,10 +50,7 @@ namespace { public: static char ID; RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), - isBulkSpilling(false) { - initializePHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + isBulkSpilling(false) {} private: const TargetMachine *TM; MachineFunction *MF; @@ -71,16 +69,20 @@ namespace { // Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse; // Last instr to use reg. + unsigned VirtReg; // Virtual register number. unsigned PhysReg; // Currently held here. unsigned short LastOpNum; // OpNum on LastUse. bool Dirty; // Register needs spill. - LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), - Dirty(false) {} + explicit LiveReg(unsigned v) + : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} + + unsigned getSparseSetKey() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } }; - typedef DenseMap<unsigned, LiveReg> LiveRegMap; - typedef LiveRegMap::value_type LiveRegEntry; + typedef SparseSet<LiveReg> LiveRegMap; // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. @@ -137,8 +139,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -159,14 +159,23 @@ namespace { void usePhysReg(MachineOperand&); void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); unsigned calcSpillCost(unsigned PhysReg) const; - void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); - void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); + void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); + LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); + LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, + unsigned Hint); LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineInstr *MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + void addRetOperands(MachineBasicBlock *MBB); }; char RAFast::ID = 0; } @@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) { /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(LiveRegMap::iterator LRI) { - addKillFlag(LRI->second); - const LiveReg &LR = LRI->second; - assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); - PhysRegState[LR.PhysReg] = regFree; + addKillFlag(*LRI); + assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && + "Broken RegState mapping"); + PhysRegState[LRI->PhysReg] = regFree; // Erase from LiveVirtRegs unless we're spilling in bulk. if (!isBulkSpilling) LiveVirtRegs.erase(LRI); @@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) { void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) killVirtReg(LRI); } @@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) { void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); spillVirtReg(MI, LRI); } @@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { /// spillVirtReg - Do the actual work of spilling. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator LRI) { - LiveReg &LR = LRI->second; - assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + LiveReg &LR = *LRI; + assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. bool SpillKill = LR.LastUse != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI) + DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) << " in " << PrintReg(LR.PhysReg, TRI)); - const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); - int FI = getStackSpaceFor(LRI->first, RC); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); + int FI = getStackSpaceFor(LRI->VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics @@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first]; + SmallVector<MachineInstr *, 4> &LRIDbgValues = + LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; const MDNode *MDPtr = @@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } } - // Now this register is spilled there is should not be any DBG_VALUE pointing - // to this register because they are all pointing to spilled value now. + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. LRIDbgValues.clear(); if (SpillKill) LR.LastUse = 0; // Don't kill register again @@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { } // Maybe a superregister is reserved? - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { switch (PhysRegState[Alias]) { case regDisabled: @@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: @@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; - default: - return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + return I->Dirty ? spillDirty : spillClean; + } } // This is a disabled register, add up cost of aliases. DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { if (UsedInInstr.test(Alias)) return spillImpossible; @@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { break; case regReserved: return spillImpossible; - default: - Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + Cost += I->Dirty ? spillDirty : spillClean; break; } + } } return Cost; } @@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// -void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { - DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to " +void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " << PrintReg(PhysReg, TRI) << "\n"); - PhysRegState[PhysReg] = LRE.first; - assert(!LRE.second.PhysReg && "Already assigned a physreg"); - LRE.second.PhysReg = PhysReg; + PhysRegState[PhysReg] = LR.VirtReg; + assert(!LR.PhysReg && "Already assigned a physreg"); + LR.PhysReg = PhysReg; +} + +RAFast::LiveRegMap::iterator +RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; } /// allocVirtReg - Allocate a physical register for VirtReg. -void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { - const unsigned VirtReg = LRE.first; +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, + LiveRegMap::iterator LRI, + unsigned Hint) { + const unsigned VirtReg = LRI->VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { if (Cost < spillDirty) { if (Cost) definePhysReg(MI, Hint, regFree); - return assignVirtToPhysReg(LRE, Hint); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, Hint); } } @@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // First try to find a completely free register. for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) - return assignVirtToPhysReg(LRE, PhysReg); + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) { + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; + } } DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " @@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. - if (Cost == 0) - return assignVirtToPhysReg(LRE, *I); + if (Cost == 0) { + assignVirtToPhysReg(*LRI, *I); + return LRI; + } if (Cost < BestCost) BestReg = *I, BestCost = Cost; } if (BestReg) { definePhysReg(MI, BestReg, regFree); - return assignVirtToPhysReg(LRE, BestReg); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, BestReg); } // Nothing we can do. Report an error and keep going with a bad allocation. MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); - assignVirtToPhysReg(LRE, *AO.begin()); + return assignVirtToPhysReg(VirtReg, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. @@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = LRI->second; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && @@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } - allocVirtReg(MI, *LRI, Hint); - } else if (LR.LastUse) { + LRI = allocVirtReg(MI, LRI, Hint); + } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. - if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) - addKillFlag(LR); + if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) + addKillFlag(*LRI); } - assert(LR.PhysReg && "Register not assigned"); - LR.LastUse = MI; - LR.LastOpNum = OpNum; - LR.Dirty = true; - UsedInInstr.set(LR.PhysReg); + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + LRI->Dirty = true; + UsedInInstr.set(LRI->PhysReg); return LRI; } @@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = LRI->second; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI->getOperand(OpNum); if (New) { - allocVirtReg(MI, *LRI, Hint); + LRI = allocVirtReg(MI, LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " - << PrintReg(LR.PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); + << PrintReg(LRI->PhysReg, TRI) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); ++NumLoads; - } else if (LR.Dirty) { + } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); if (MO.isUse()) @@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); MO.setIsDead(false); } - assert(LR.PhysReg && "Register not assigned"); - LR.LastUse = MI; - LR.LastOpNum = OpNum; - UsedInInstr.set(LR.PhysReg); + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + UsedInInstr.set(LRI->PhysReg); return LRI; } @@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, UsedInInstr.set(Reg); if (ThroughRegs.count(PhysRegState[Reg])) definePhysReg(MI, Reg, regFree); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { UsedInInstr.set(*AS); if (ThroughRegs.count(PhysRegState[*AS])) definePhysReg(MI, *AS, regFree); @@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } SmallVector<unsigned, 8> PartialDefs; - DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " << DefIdx << ".\n"); LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; setPhysReg(MI, i, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. @@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI, // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); - PartialDefs.push_back(LRI->second.PhysReg); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - if (setPhysReg(MI, i, PhysReg)) - VirtDead.push_back(Reg); + PartialDefs.push_back(LRI->PhysReg); } } + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI, UsedInInstr.set(PartialDefs[i]); } -void RAFast::AllocateBasicBlock() { - DEBUG(dbgs() << "\nAllocating " << *MBB); +/// addRetOperand - ensure that a return instruction has an operand for each +/// value live out of the function. +/// +/// Things marked both call and return are tail calls; do not do this for them. +/// The tail callee need not take the same registers as input that it produces +/// as output, and there are dependencies for its input registers elsewhere. +/// +/// FIXME: This should be done as part of instruction selection, and this helper +/// should be deleted. Until then, we use custom logic here to create the proper +/// operand under all circumstances. We can't use addRegisterKilled because that +/// doesn't make sense for undefined values. We can't simply avoid calling it +/// for undefined values, because we must ensure that the operand always exists. +void RAFast::addRetOperands(MachineBasicBlock *MBB) { + if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall()) + return; + + MachineInstr *MI = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MBB->getParent()->getRegInfo().liveout_begin(), + E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) { + unsigned Reg = *I; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Cannot have a live-out virtual register."); + + bool hasDef = PhysRegState[Reg] == regReserved; + + // Check if this register already has an operand. + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + + unsigned OperReg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(OperReg)) + continue; - // FIXME: This should probably be added by instruction selection instead? - // If the last instruction in the block is a return, make sure to mark it as - // using all of the live-out values in the function. Things marked both call - // and return are tail calls; do not do this for them. The tail callee need - // not take the same registers as input that it produces as output, and there - // are dependencies for its input registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() && - !MBB->back().getDesc().isCall()) { - MachineInstr *Ret = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - assert(TargetRegisterInfo::isPhysicalRegister(*I) && - "Cannot have a live-out virtual register."); - - // Add live-out registers as implicit uses. - Ret->addRegisterKilled(*I, TRI, true); + if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) { + // If the ret already has an operand for this physreg or a superset, + // don't duplicate it. Set the kill flag if the value is defined. + if (hasDef && !MO.isKill()) + MO.setIsKill(); + Found = true; + break; + } } + if (!Found) + MI->addOperand(MachineOperand::CreateReg(Reg, + false /*IsDef*/, + true /*IsImp*/, + hasDef/*IsKill*/)); } +} + +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); PhysRegState.assign(TRI->getNumRegs(), regDisabled); - assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); + assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB->begin(); @@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() { case regReserved: dbgs() << "*"; break; - default: + default: { dbgs() << '=' << PrintReg(PhysRegState[Reg]); - if (LiveVirtRegs[PhysRegState[Reg]].Dirty) + LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + if (I->Dirty) dbgs() << "*"; - assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && - "Bad inverse map"); + assert(I->PhysReg == Reg && "Bad inverse map"); break; } + } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) { - assert(TargetRegisterInfo::isVirtualRegister(i->first) && + assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) && + assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && "Bad map value"); - assert(PhysRegState[i->second.PhysReg] == i->first && - "Bad inverse map"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } }); @@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveDbgValueMap[Reg].push_back(MI); - LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end()) - setPhysReg(MI, i, LRI->second.PhysReg); + setPhysReg(MI, i, LRI->PhysReg); else { int SS = StackSlotForVirtReg[Reg]; if (SS == -1) { @@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() { } } } + LiveDbgValueMap[Reg].push_back(MI); } } // Next instruction. @@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); @@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() { // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) UsedInInstr.set(*AS); } } unsigned DefOpEnd = MI->getNumOperands(); - if (MCID.isCall()) { + if (MI->isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through @@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() { continue; } LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) { VirtDead.push_back(Reg); CopyDst = 0; // cancel coalescing; @@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() { MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); + // addRetOperands must run after we've seen all defs in this block. + addRetOperands(MBB); + DEBUG(MBB->dump()); } @@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); + MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.resize(TRI->getNumRegs()); + assert(!MRI->isSSA() && "regalloc requires leaving SSA"); + // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); @@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { AllocateBasicBlock(); } - // Make sure the set of used physregs is closed under subreg operations. - MRI->closePhysRegsUsed(*TRI); - // Add the clobber lists for all the instructions we skipped earlier. for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const unsigned *Defs = (*I)->getImplicitDefs()) + if (const uint16_t *Defs = (*I)->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + MRI->clearVirtRegs(); + SkippedInstrs.clear(); StackSlotForVirtReg.clear(); LiveDbgValueMap.clear(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index f54a2c85d100..3f2a617100c3 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,7 +16,6 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -29,6 +28,7 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass, } }; + // Register mask interference. The current VirtReg is checked for register + // mask interference on entry to selectOrSplit(). If there is no + // interference, UsableRegs is left empty. If there is interference, + // UsableRegs has a bit mask of registers that can be used without register + // mask interference. + BitVector UsableRegs; + + /// clobberedByRegMask - Returns true if PhysReg is not directly usable + /// because of register mask clobbers. + bool clobberedByRegMask(unsigned PhysReg) const { + return !UsableRegs.empty() && !UsableRegs.test(PhysReg); + } + // splitting state. std::auto_ptr<SplitAnalysis> SA; std::auto_ptr<SplitEditor> SE; @@ -248,7 +261,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); @@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); @@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) + while ((PhysReg = Order.next())) { + if (clobberedByRegMask(PhysReg)) + continue; if (!checkPhysRegInterference(VirtReg, PhysReg)) break; + } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint)) { + if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) @@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { @@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (clobberedByRegMask(PhysReg)) + continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; } if (GlobalCand.size() <= NumCands) @@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl<float> &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); - const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. @@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI) .checkInterference()) continue; @@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // that the interval is continuous from FirstInstr to LastInstr. We should // make sure that we don't do anything illegal to such an interval, though. - const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); if (Uses.size() <= 2) return 0; const unsigned NumGaps = Uses.size()-1; @@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << ' ' << Uses[i]; dbgs() << '\n'; }); + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector<unsigned, 8> RegMaskGaps; + if (!UsableRegs.empty()) { + // Get regmask slots for the whole block. + ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + // Since we allow local split results to be split again, there is a risk of // creating infinite loops. It is tempting to require that the new live // ranges have less instructions than the original. That would guarantee @@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. calcGapWeights(PhysReg, GapWeight); + // Remove any gaps with regmask clobbers. + if (clobberedByRegMask(PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { + // Check if VirtReg is live across any calls. + UsableRegs.clear(); + if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) + DEBUG(dbgs() << "Live across regmasks.\n"); + // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); @@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DebugVars->emitDebugValues(VRM); } - // The pass output is in VirtRegMap. Release all the transient data. + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); releaseMemory(); return true; diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp deleted file mode 100644 index ce3fb90b1126..000000000000 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ /dev/null @@ -1,1543 +0,0 @@ -//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a linear scan register allocator. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" -#include "VirtRegMap.h" -#include "VirtRegRewriter.h" -#include "RegisterClassInfo.h" -#include "Spiller.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <queue> -#include <memory> -#include <cmath> - -using namespace llvm; - -STATISTIC(NumIters , "Number of iterations performed"); -STATISTIC(NumBacktracks, "Number of times we had to backtrack"); -STATISTIC(NumCoalesce, "Number of copies coalesced"); -STATISTIC(NumDowngrade, "Number of registers downgraded"); - -static cl::opt<bool> -NewHeuristic("new-spilling-heuristic", - cl::desc("Use new spilling heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -TrivCoalesceEnds("trivial-coalesce-ends", - cl::desc("Attempt trivial coalescing of interval ends"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -AvoidWAWHazard("avoid-waw-hazard", - cl::desc("Avoid write-write hazards for some register classes"), - cl::init(false), cl::Hidden); - -static RegisterRegAlloc -linearscanRegAlloc("linearscan", "linear scan register allocator", - createLinearScanRegisterAllocator); - -namespace { - // When we allocate a register, add it to a fixed-size queue of - // registers to skip in subsequent allocations. This trades a small - // amount of register pressure and increased spills for flexibility in - // the post-pass scheduler. - // - // Note that in a the number of registers used for reloading spills - // will be one greater than the value of this option. - // - // One big limitation of this is that it doesn't differentiate between - // different register classes. So on x86-64, if there is xmm register - // pressure, it can caused fewer GPRs to be held in the queue. - static cl::opt<unsigned> - NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember" - "to skip."), - cl::init(0), - cl::Hidden); - - struct RALinScan : public MachineFunctionPass { - static char ID; - RALinScan() : MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass( - *PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - - // Initialize the queue to record recently-used registers. - if (NumRecentlyUsedRegs > 0) - RecentRegs.resize(NumRecentlyUsedRegs, 0); - RecentNext = RecentRegs.begin(); - avoidWAW_ = 0; - } - - typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; - typedef SmallVector<IntervalPtr, 32> IntervalPtrs; - private: - /// RelatedRegClasses - This structure is built the first time a function is - /// compiled, and keeps track of which register classes have registers that - /// belong to multiple classes or have aliases that are in other classes. - EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses; - DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg; - - // NextReloadMap - For each register in the map, it maps to the another - // register which is defined by a reload from the same stack slot and - // both reloads are in the same basic block. - DenseMap<unsigned, unsigned> NextReloadMap; - - // DowngradedRegs - A set of registers which are being "downgraded", i.e. - // un-favored for allocation. - SmallSet<unsigned, 8> DowngradedRegs; - - // DowngradeMap - A map from virtual registers to physical registers being - // downgraded for the virtual registers. - DenseMap<unsigned, unsigned> DowngradeMap; - - MachineFunction* mf_; - MachineRegisterInfo* mri_; - const TargetMachine* tm_; - const TargetRegisterInfo* tri_; - const TargetInstrInfo* tii_; - BitVector allocatableRegs_; - BitVector reservedRegs_; - LiveIntervals* li_; - MachineLoopInfo *loopInfo; - RegisterClassInfo RegClassInfo; - - /// handled_ - Intervals are added to the handled_ set in the order of their - /// start value. This is uses for backtracking. - std::vector<LiveInterval*> handled_; - - /// fixed_ - Intervals that correspond to machine registers. - /// - IntervalPtrs fixed_; - - /// active_ - Intervals that are currently being processed, and which have a - /// live range active for the current point. - IntervalPtrs active_; - - /// inactive_ - Intervals that are currently being processed, but which have - /// a hold at the current point. - IntervalPtrs inactive_; - - typedef std::priority_queue<LiveInterval*, - SmallVector<LiveInterval*, 64>, - greater_ptr<LiveInterval> > IntervalHeap; - IntervalHeap unhandled_; - - /// regUse_ - Tracks register usage. - SmallVector<unsigned, 32> regUse_; - SmallVector<unsigned, 32> regUseBackUp_; - - /// vrm_ - Tracks register assignments. - VirtRegMap* vrm_; - - std::auto_ptr<VirtRegRewriter> rewriter_; - - std::auto_ptr<Spiller> spiller_; - - // The queue of recently-used registers. - SmallVector<unsigned, 4> RecentRegs; - SmallVector<unsigned, 4>::iterator RecentNext; - - // Last write-after-write register written. - unsigned avoidWAW_; - - // Record that we just picked this register. - void recordRecentlyUsed(unsigned reg) { - assert(reg != 0 && "Recently used register is NOREG!"); - if (!RecentRegs.empty()) { - *RecentNext++ = reg; - if (RecentNext == RecentRegs.end()) - RecentNext = RecentRegs.begin(); - } - } - - public: - virtual const char* getPassName() const { - return "Linear Scan Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<SlotIndexes>(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - // Make sure PassManager knows which analyses to make available - // to coalescing and which analyses coalescing invalidates. - AU.addRequiredTransitiveID(RegisterCoalescerPassID); - AU.addRequired<CalculateSpillWeights>(); - AU.addRequiredID(LiveStacksID); - AU.addPreservedID(LiveStacksID); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<VirtRegMap>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); - AU.addRequiredID(MachineDominatorsID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// runOnMachineFunction - register allocate the whole function - bool runOnMachineFunction(MachineFunction&); - - // Determine if we skip this register due to its being recently used. - bool isRecentlyUsed(unsigned reg) const { - return reg == avoidWAW_ || - std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); - } - - private: - /// linearScan - the linear scan algorithm - void linearScan(); - - /// initIntervalSets - initialize the interval sets. - /// - void initIntervalSets(); - - /// processActiveIntervals - expire old intervals and move non-overlapping - /// ones to the inactive list. - void processActiveIntervals(SlotIndex CurPoint); - - /// processInactiveIntervals - expire old intervals and move overlapping - /// ones to the active list. - void processInactiveIntervals(SlotIndex CurPoint); - - /// hasNextReloadInterval - Return the next liveinterval that's being - /// defined by a reload from the same SS as the specified one. - LiveInterval *hasNextReloadInterval(LiveInterval *cur); - - /// DowngradeRegister - Downgrade a register for allocation. - void DowngradeRegister(LiveInterval *li, unsigned Reg); - - /// UpgradeRegister - Upgrade a register for allocation. - void UpgradeRegister(unsigned Reg); - - /// assignRegOrStackSlotAtInterval - assign a register if one - /// is available, or spill. - void assignRegOrStackSlotAtInterval(LiveInterval* cur); - - void updateSpillWeights(std::vector<float> &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC); - - /// findIntervalsToSpill - Determine the intervals to spill for the - /// specified interval. It's passed the physical registers whose spill - /// weight is the lowest among all the registers whose live intervals - /// conflict with the interval. - void findIntervalsToSpill(LiveInterval *cur, - std::vector<std::pair<unsigned,float> > &Candidates, - unsigned NumCands, - SmallVector<LiveInterval*, 8> &SpillIntervals); - - /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try to allocate the definition to the same register as the source, - /// if the register is not defined during the life time of the interval. - /// This eliminates a copy, and is used to coalesce copies which were not - /// coalesced away before allocation either due to dest and src being in - /// different register classes or because the coalescer was overly - /// conservative. - unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); - - /// - /// Register usage / availability tracking helpers. - /// - - void initRegUses() { - regUse_.resize(tri_->getNumRegs(), 0); - regUseBackUp_.resize(tri_->getNumRegs(), 0); - } - - void finalizeRegUses() { -#ifndef NDEBUG - // Verify all the registers are "freed". - bool Error = false; - for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { - if (regUse_[i] != 0) { - dbgs() << tri_->getName(i) << " is still in use!\n"; - Error = true; - } - } - if (Error) - llvm_unreachable(0); -#endif - regUse_.clear(); - regUseBackUp_.clear(); - } - - void addRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - ++regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) - ++regUse_[*as]; - } - - void delRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - assert(regUse_[physReg] != 0); - --regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { - assert(regUse_[*as] != 0); - --regUse_[*as]; - } - } - - bool isRegAvail(unsigned physReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - return regUse_[physReg] == 0; - } - - void backUpRegUses() { - regUseBackUp_ = regUse_; - } - - void restoreRegUses() { - regUse_ = regUseBackUp_; - } - - /// - /// Register handling helpers. - /// - - /// getFreePhysReg - return a free physical register for this virtual - /// register interval if we have one, otherwise return 0. - unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector<unsigned, 256> &inactiveCounts, - bool SkipDGRegs); - - /// getFirstNonReservedPhysReg - return the first non-reserved physical - /// register in the register class. - unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - ArrayRef<unsigned> O = RegClassInfo.getOrder(RC); - assert(!O.empty() && "All registers reserved?!"); - return O.front(); - } - - void ComputeRelatedRegClasses(); - - template <typename ItTy> - void printIntervals(const char* const str, ItTy i, ItTy e) const { - DEBUG({ - if (str) - dbgs() << str << " intervals:\n"; - - for (; i != e; ++i) { - dbgs() << '\t' << *i->first << " -> "; - - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) - reg = vrm_->getPhys(reg); - - dbgs() << tri_->getName(reg) << '\n'; - } - }); - } - }; - char RALinScan::ID = 0; -} - -INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) - -void RALinScan::ComputeRelatedRegClasses() { - // First pass, add all reg classes to the union, and determine at least one - // reg class that each register is in. - bool HasAliases = false; - for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), - E = tri_->regclass_end(); RCI != E; ++RCI) { - RelatedRegClasses.insert(*RCI); - for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); - I != E; ++I) { - HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - - const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; - if (PRC) { - // Already processed this register. Just make sure we know that - // multiple register classes share a register. - RelatedRegClasses.unionSets(PRC, *RCI); - } else { - PRC = *RCI; - } - } - } - - // Second pass, now that we know conservatively what register classes each reg - // belongs to, add info about aliases. We don't need to do this for targets - // without register aliases. - if (HasAliases) - for (DenseMap<unsigned, const TargetRegisterClass*>::iterator - I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); - I != E; ++I) - for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) { - const TargetRegisterClass *AliasClass = - OneClassForEachPhysReg.lookup(*AS); - if (AliasClass) - RelatedRegClasses.unionSets(I->second, AliasClass); - } -} - -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try -/// allocate the definition the same register as the source register if the -/// register is not defined during live time of the interval. If the interval is -/// killed by a copy, try to use the destination register. This eliminates a -/// copy. This is used to coalesce copies which were not coalesced away before -/// allocation either due to dest and src being in different register classes or -/// because the coalescer was overly conservative. -unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - unsigned Preference = vrm_->getRegAllocPref(cur.reg); - if ((Preference && Preference == Reg) || !cur.containsOneValue()) - return Reg; - - // We cannot handle complicated live ranges. Simple linear stuff only. - if (cur.ranges.size() != 1) - return Reg; - - const LiveRange &range = cur.ranges.front(); - - VNInfo *vni = range.valno; - if (vni->isUnused() || !vni->def.isValid()) - return Reg; - - unsigned CandReg; - { - MachineInstr *CopyMI; - if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) - // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->getOperand(1).getReg(); - else if (TrivCoalesceEnds && - (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && - CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) - // Only used by a copy, try to extend DstReg backwards - CandReg = CopyMI->getOperand(0).getReg(); - else - return Reg; - - // If the target of the copy is a sub-register then don't coalesce. - if(CopyMI->getOperand(0).getSubReg()) - return Reg; - } - - if (TargetRegisterInfo::isVirtualRegister(CandReg)) { - if (!vrm_->isAssignedReg(CandReg)) - return Reg; - CandReg = vrm_->getPhys(CandReg); - } - if (Reg == CandReg) - return Reg; - - const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(CandReg)) - return Reg; - - if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) - return Reg; - - // Try to coalesce. - DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, CandReg); - - ++NumCoalesce; - return CandReg; -} - -bool RALinScan::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - li_ = &getAnalysis<LiveIntervals>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); - RegClassInfo.runOnMachineFunction(fn); - - // We don't run the coalescer here because we have no reason to - // interact with it. If the coalescer requires interaction, it - // won't do anything. If it doesn't require interaction, we assume - // it was run as a separate pass. - - // If this is the first function compiled, compute the related reg classes. - if (RelatedRegClasses.empty()) - ComputeRelatedRegClasses(); - - // Also resize register usage trackers. - initRegUses(); - - vrm_ = &getAnalysis<VirtRegMap>(); - if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(*this, *mf_, *vrm_)); - - initIntervalSets(); - - linearScan(); - - // Rewrite spill code and update the PhysRegsUsed set. - rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); - - // Write out new DBG_VALUE instructions. - getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_); - - assert(unhandled_.empty() && "Unhandled live intervals remain!"); - - finalizeRegUses(); - - fixed_.clear(); - active_.clear(); - inactive_.clear(); - handled_.clear(); - NextReloadMap.clear(); - DowngradedRegs.clear(); - DowngradeMap.clear(); - spiller_.reset(0); - - return true; -} - -/// initIntervalSets - initialize the interval sets. -/// -void RALinScan::initIntervalSets() -{ - assert(unhandled_.empty() && fixed_.empty() && - active_.empty() && inactive_.empty() && - "interval sets should be empty on initialization"); - - handled_.reserve(li_->getNumIntervals()); - - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { - mri_->setPhysRegUsed(i->second->reg); - fixed_.push_back(std::make_pair(i->second, i->second->begin())); - } - } else { - if (i->second->empty()) { - assignRegOrStackSlotAtInterval(i->second); - } - else - unhandled_.push(i->second); - } - } -} - -void RALinScan::linearScan() { - // linear scan algorithm - DEBUG({ - dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " - << mf_->getFunction()->getName() << '\n'; - printIntervals("fixed", fixed_.begin(), fixed_.end()); - }); - - while (!unhandled_.empty()) { - // pick the interval with the earliest start point - LiveInterval* cur = unhandled_.top(); - unhandled_.pop(); - ++NumIters; - DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); - - assert(!cur->empty() && "Empty interval in unhandled set."); - - processActiveIntervals(cur->beginIndex()); - processInactiveIntervals(cur->beginIndex()); - - assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); - - // Allocating a virtual register. try to find a free - // physical register or spill an interval (possibly this one) in order to - // assign it one. - assignRegOrStackSlotAtInterval(cur); - - DEBUG({ - printIntervals("active", active_.begin(), active_.end()); - printIntervals("inactive", inactive_.begin(), inactive_.end()); - }); - } - - // Expire any remaining active intervals - while (!active_.empty()) { - IntervalPtr &IP = active_.back(); - unsigned reg = IP.first->reg; - DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - active_.pop_back(); - } - - // Expire any remaining inactive intervals - DEBUG({ - for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - dbgs() << "\tinterval " << *i->first << " expired\n"; - }); - inactive_.clear(); - - // Add live-ins to every BB except for entry. Also perform trivial coalescing. - MachineFunction::iterator EntryMBB = mf_->begin(); - SmallVector<MachineBasicBlock*, 8> LiveInMBBs; - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - LiveInterval &cur = *i->second; - unsigned Reg = 0; - bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); - if (isPhys) - Reg = cur.reg; - else if (vrm_->isAssignedReg(cur.reg)) - Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); - if (!Reg) - continue; - // Ignore splited live intervals. - if (!isPhys && vrm_->getPreSplitReg(cur.reg)) - continue; - - for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); - I != E; ++I) { - const LiveRange &LR = *I; - if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { - for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Adding a virtual register to livein set?"); - LiveInMBBs[i]->addLiveIn(Reg); - } - LiveInMBBs.clear(); - } - } - } - - DEBUG(dbgs() << *vrm_); - - // Look for physical registers that end up not being allocated even though - // register allocator had to spill other registers in its register class. - if (!vrm_->FindUnusedRegisters(li_)) - return; -} - -/// processActiveIntervals - expire old intervals and move non-overlapping ones -/// to the inactive list. -void RALinScan::processActiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing active intervals:\n"); - - for (unsigned i = 0, e = active_.size(); i != e; ++i) { - LiveInterval *Interval = active_[i].first; - LiveInterval::iterator IntervalPos = active_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - - } else if (IntervalPos->start > CurPoint) { - // Move inactive intervals to inactive list. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - // add to inactive. - inactive_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - active_[i].second = IntervalPos; - } - } -} - -/// processInactiveIntervals - expire old intervals and move overlapping -/// ones to the active list. -void RALinScan::processInactiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); - - for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { - LiveInterval *Interval = inactive_[i].first; - LiveInterval::iterator IntervalPos = inactive_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else if (IntervalPos->start <= CurPoint) { - // move re-activated intervals in active list - DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - addRegUse(reg); - // add to active - active_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - inactive_[i].second = IntervalPos; - } - } -} - -/// updateSpillWeights - updates the spill weights of the specifed physical -/// register and its weight. -void RALinScan::updateSpillWeights(std::vector<float> &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC) { - SmallSet<unsigned, 4> Processed; - SmallSet<unsigned, 4> SuperAdded; - SmallVector<unsigned, 4> Supers; - Weights[reg] += weight; - Processed.insert(reg); - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { - Weights[*as] += weight; - Processed.insert(*as); - if (tri_->isSubRegister(*as, reg) && - SuperAdded.insert(*as) && - RC->contains(*as)) { - Supers.push_back(*as); - } - } - - // If the alias is a super-register, and the super-register is in the - // register class we are trying to allocate. Then add the weight to all - // sub-registers of the super-register even if they are not aliases. - // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be chosen - // as a spill candidate since spilling bh doesn't make ebx available. - for (unsigned i = 0, e = Supers.size(); i != e; ++i) { - for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) - if (!Processed.count(*sr)) - Weights[*sr] += weight; - } -} - -static -RALinScan::IntervalPtrs::iterator -FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { - for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); - I != E; ++I) - if (I->first == LI) return I; - return IP.end(); -} - -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, - SlotIndex Point){ - for (unsigned i = 0, e = V.size(); i != e; ++i) { - RALinScan::IntervalPtr &IP = V[i]; - LiveInterval::iterator I = std::upper_bound(IP.first->begin(), - IP.second, Point); - if (I != IP.first->begin()) --I; - IP.second = I; - } -} - -/// getConflictWeight - Return the number of conflicts between cur -/// live interval and defs and uses of Reg weighted by loop depthes. -static -float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, - MachineRegisterInfo *mri_, - MachineLoopInfo *loopInfo) { - float Conflicts = 0; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ++I) { - MachineInstr *MI = &*I; - if (cur->liveAt(li_->getInstructionIndex(MI))) { - unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += std::pow(10.0f, (float)loopDepth); - } - } - return Conflicts; -} - -/// findIntervalsToSpill - Determine the intervals to spill for the -/// specified interval. It's passed the physical registers whose spill -/// weight is the lowest among all the registers whose live intervals -/// conflict with the interval. -void RALinScan::findIntervalsToSpill(LiveInterval *cur, - std::vector<std::pair<unsigned,float> > &Candidates, - unsigned NumCands, - SmallVector<LiveInterval*, 8> &SpillIntervals) { - // We have figured out the *best* register to spill. But there are other - // registers that are pretty good as well (spill weight within 3%). Spill - // the one that has fewest defs and uses that conflict with cur. - float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; - SmallVector<LiveInterval*, 8> SLIs[3]; - - DEBUG({ - dbgs() << "\tConsidering " << NumCands << " candidates: "; - for (unsigned i = 0; i != NumCands; ++i) - dbgs() << tri_->getName(Candidates[i].first) << " "; - dbgs() << "\n"; - }); - - // Calculate the number of conflicts of each candidate. - for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second-1)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - // Which is the best candidate? - unsigned BestCandidate = 0; - float MinConflicts = Conflicts[0]; - for (unsigned i = 1; i != NumCands; ++i) { - if (Conflicts[i] < MinConflicts) { - BestCandidate = i; - MinConflicts = Conflicts[i]; - } - } - - std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), - std::back_inserter(SpillIntervals)); -} - -namespace { - struct WeightCompare { - private: - const RALinScan &Allocator; - - public: - WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} - - typedef std::pair<unsigned, float> RegWeightPair; - bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); - } - }; -} - -static bool weightsAreClose(float w1, float w2) { - if (!NewHeuristic) - return false; - - float diff = w1 - w2; - if (diff <= 0.02f) // Within 0.02f - return true; - return (diff / w2) <= 0.05f; // Within 5%. -} - -LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { - DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg); - if (I == NextReloadMap.end()) - return 0; - return &li_->getInterval(I->second); -} - -void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { - for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) { - bool isNew = DowngradedRegs.insert(*AS); - (void)isNew; // Silence compiler warning. - assert(isNew && "Multiple reloads holding the same register?"); - DowngradeMap.insert(std::make_pair(li->reg, *AS)); - } - ++NumDowngrade; -} - -void RALinScan::UpgradeRegister(unsigned Reg) { - if (Reg) { - DowngradedRegs.erase(Reg); - for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) - DowngradedRegs.erase(*AS); - } -} - -namespace { - struct LISorter { - bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginIndex() < B->beginIndex(); - } - }; -} - -/// assignRegOrStackSlotAtInterval - assign a register if one is available, or -/// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - DEBUG(dbgs() << "\tallocating current interval from " - << RC->getName() << ": "); - - // This is an implicitly defined live interval, just assign any register. - if (cur->empty()) { - unsigned physReg = vrm_->getRegAllocPref(cur->reg); - if (!physReg) - physReg = getFirstNonReservedPhysReg(RC); - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - // Note the register is not really in use. - vrm_->assignVirt2Phys(cur->reg, physReg); - return; - } - - backUpRegUses(); - - std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; - SlotIndex StartPosition = cur->beginIndex(); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - // If start of this live interval is defined by a move instruction and its - // source is assigned a physical register that is compatible with the target - // register class, then we should try to assign it the same register. - // This can happen when the move is from a larger register class to a smaller - // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { - VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused() && vni->def.isValid()) { - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - if (CopyMI && CopyMI->isCopy()) { - unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); - unsigned SrcReg = CopyMI->getOperand(1).getReg(); - unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } - } - } - - // For every interval in inactive we overlap with, mark the - // register as not free and update spill weights. - for (IntervalPtrs::const_iterator i = inactive_.begin(), - e = inactive_.end(); i != e; ++i) { - unsigned Reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, - // don't check it. - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - cur->overlapsFrom(*i->first, i->second-1)) { - Reg = vrm_->getPhys(Reg); - addRegUse(Reg); - SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); - } - } - - // Speculatively check to see if we can get a register right now. If not, - // we know we won't be able to by adding more constraints. If so, we can - // check to see if it is valid. Doing an exhaustive search of the fixed_ list - // is very bad (it contains all callee clobbered registers for any functions - // with a call), so we want to avoid doing that if possible. - unsigned physReg = getFreePhysReg(cur); - unsigned BestPhysReg = physReg; - if (physReg) { - // We got a register. However, if it's in the fixed_ list, we might - // conflict with it. Check to see if we conflict with it or any of its - // aliases. - SmallSet<unsigned, 8> RegAliases; - for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) - RegAliases.insert(*AS); - - bool ConflictsWithFixed = false; - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { - // Okay, this reg is on the fixed list. Check to see if we actually - // conflict. - LiveInterval *I = IP.first; - if (I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - ConflictsWithFixed = true; - break; - } - } - } - } - - // Okay, the register picked by our speculative getFreePhysReg call turned - // out to be in use. Actually add all of the conflicting fixed registers to - // regUse_ so we can do an accurate query. - if (ConflictsWithFixed) { - // For every interval in fixed we overlap with, mark the register as not - // free and update spill weights. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - - const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - unsigned reg = I->reg; - addRegUse(reg); - SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); - } - } - } - - // Using the newly updated regUse_ object, which includes conflicts in the - // future, see if there are any registers available. - physReg = getFreePhysReg(cur); - } - } - - // Restore the physical register tracker, removing information about the - // future. - restoreRegUses(); - - // If we find a free register, we are done: assign this virtual to - // the free physical register and add this interval to the active - // list. - if (physReg) { - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - assert(RC->contains(physReg) && "Invalid candidate"); - vrm_->assignVirt2Phys(cur->reg, physReg); - addRegUse(physReg); - active_.push_back(std::make_pair(cur, cur->begin())); - handled_.push_back(cur); - - // Remember physReg for avoiding a write-after-write hazard in the next - // instruction. - if (AvoidWAWHazard && - tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) - avoidWAW_ = physReg; - - // "Upgrade" the physical register since it has been allocated. - UpgradeRegister(physReg); - if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { - // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. - mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); - DowngradeRegister(cur, physReg); - } - return; - } - DEBUG(dbgs() << "no free registers\n"); - - // Compile the spill weights into an array that is better for scanning. - std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); - for (std::vector<std::pair<unsigned, float> >::iterator - I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, RC); - - // for each interval in active, update spill weights. - for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, RC); - } - - DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); - - // Find a register to spill. - float minWeight = HUGE_VALF; - unsigned minReg = 0; - - bool Found = false; - std::vector<std::pair<unsigned,float> > RegsWeights; - ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); - if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - float regWeight = SpillWeights[reg]; - // Skip recently allocated registers and reserved registers. - if (minWeight > regWeight && !isRecentlyUsed(reg)) - Found = true; - RegsWeights.push_back(std::make_pair(reg, regWeight)); - } - - // If we didn't find a register that is spillable, try aliases? - if (!Found) { - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - // No need to worry about if the alias register size < regsize of RC. - // We are going to spill all registers that alias it anyway. - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) - RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); - } - } - - // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); - minReg = RegsWeights[0].first; - minWeight = RegsWeights[0].second; - if (minWeight == HUGE_VALF) { - // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); - if (cur->weight == HUGE_VALF || - li_->getApproximateInstructionCount(*cur) == 0) { - // Spill a physical register around defs and uses. - if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { - // spillPhysRegAroundRegDefsUses may have invalidated iterator stored - // in fixed_. Reset them. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) - IP.second = I->advanceTo(I->begin(), StartPosition); - } - - DowngradedRegs.clear(); - assignRegOrStackSlotAtInterval(cur); - } else { - assert(false && "Ran out of registers during register allocation!"); - report_fatal_error("Ran out of registers during register allocation!"); - } - return; - } - } - - // Find up to 3 registers to consider as spill candidates. - unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; - while (LastCandidate > 1) { - if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) - break; - --LastCandidate; - } - - DEBUG({ - dbgs() << "\t\tregister(s) with min weight(s): "; - - for (unsigned i = 0; i != LastCandidate; ++i) - dbgs() << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"; - }); - - // If the current has the minimum weight, we need to spill it and - // add any added intervals back to unhandled, and restart - // linearscan. - if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> added; - LiveRangeEdit LRE(*cur, added); - spiller_->spill(LRE); - - std::sort(added.begin(), added.end(), LISorter()); - if (added.empty()) - return; // Early exit if all spills were folded. - - // Merge added with unhandled. Note that we have already sorted - // intervals returned by addIntervalsForSpills by their starting - // point. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } - return; - } - - ++NumBacktracks; - - // Push the current interval back to unhandled since we are going - // to re-run at least this iteration. Since we didn't modify it it - // should go back right in the front of the list - unhandled_.push(cur); - - assert(TargetRegisterInfo::isPhysicalRegister(minReg) && - "did not choose a register to spill?"); - - // We spill all intervals aliasing the register with - // minimum weight, rollback to the interval with the earliest - // start point and let the linear scan algorithm run again - SmallVector<LiveInterval*, 8> spillIs; - - // Determine which intervals have to be spilled. - findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); - - // Set of spilled vregs (used later to rollback properly) - SmallSet<unsigned, 8> spilled; - - // The earliest start of a Spilled interval indicates up to where - // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); - SlotIndex earliestStart = spillIs[0]->beginIndex(); - - // Spill live intervals of virtual regs mapped to the physical register we - // want to clear (and its aliases). We only spill those that overlap with the - // current interval as the rest do not affect its allocation. we also keep - // track of the earliest start of all spilled live intervals since this will - // mark our rollback point. - SmallVector<LiveInterval*, 8> added; - while (!spillIs.empty()) { - LiveInterval *sli = spillIs.back(); - spillIs.pop_back(); - DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); - if (sli->beginIndex() < earliestStart) - earliestStart = sli->beginIndex(); - LiveRangeEdit LRE(*sli, added, 0, &spillIs); - spiller_->spill(LRE); - spilled.insert(sli->reg); - } - - // Include any added intervals in earliestStart. - for (unsigned i = 0, e = added.size(); i != e; ++i) { - SlotIndex SI = added[i]->beginIndex(); - if (SI < earliestStart) - earliestStart = SI; - } - - DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); - - // Scan handled in reverse order up to the earliest start of a - // spilled live interval and undo each one, restoring the state of - // unhandled. - while (!handled_.empty()) { - LiveInterval* i = handled_.back(); - // If this interval starts before t we are done. - if (!i->empty() && i->beginIndex() < earliestStart) - break; - DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); - handled_.pop_back(); - - // When undoing a live interval allocation we must know if it is active or - // inactive to properly update regUse_ and the VirtRegMap. - IntervalPtrs::iterator it; - if ((it = FindIntervalInVector(active_, i)) != active_.end()) { - active_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - delRegUse(vrm_->getPhys(i->reg)); - vrm_->clearVirt(i->reg); - } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { - inactive_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - vrm_->clearVirt(i->reg); - } else { - assert(TargetRegisterInfo::isVirtualRegister(i->reg) && - "Can only allocate virtual registers!"); - vrm_->clearVirt(i->reg); - unhandled_.push(i); - } - - DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg); - if (ii == DowngradeMap.end()) - // It interval has a preference, it must be defined by a copy. Clear the - // preference now since the source interval allocation may have been - // undone as well. - mri_->setRegAllocationHint(i->reg, 0, 0); - else { - UpgradeRegister(ii->second); - } - } - - // Rewind the iterators in the active, inactive, and fixed lists back to the - // point we reverted to. - RevertVectorIteratorsTo(active_, earliestStart); - RevertVectorIteratorsTo(inactive_, earliestStart); - RevertVectorIteratorsTo(fixed_, earliestStart); - - // Scan the rest and undo each interval that expired after t and - // insert it in active (the next iteration of the algorithm will - // put it in inactive if required) - for (unsigned i = 0, e = handled_.size(); i != e; ++i) { - LiveInterval *HI = handled_[i]; - if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginIndex())) { - DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); - active_.push_back(std::make_pair(HI, HI->begin())); - assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); - addRegUse(vrm_->getPhys(HI->reg)); - } - } - - // Merge added with unhandled. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - std::sort(added.begin(), added.end(), LISorter()); - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } -} - -unsigned RALinScan::getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector<unsigned, 256> &inactiveCounts, - bool SkipDGRegs) { - unsigned FreeReg = 0; - unsigned FreeRegInactiveCount = 0; - - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg); - // Resolve second part of the hint (if possible) given the current allocation. - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) - physReg = vrm_->getPhys(physReg); - - ArrayRef<unsigned> Order; - if (Hint.first) - Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); - else - Order = RegClassInfo.getOrder(RC); - - assert(!Order.empty() && "No allocatable register in this register class!"); - - // Scan for the first available register. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - // Skip recently allocated registers. - if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - if (FreeReg < inactiveCounts.size()) - FreeRegInactiveCount = inactiveCounts[FreeReg]; - else - FreeRegInactiveCount = 0; - break; - } - } - - // If there are no free regs, or if this reg has the max inactive count, - // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { - // Remember what register we picked so we can skip it next time. - if (FreeReg != 0) recordRecentlyUsed(FreeReg); - return FreeReg; - } - - // Continue scanning the registers, looking for the one with the highest - // inactive count. Alkis found that this reduced register pressure very - // slightly on X86 (in rev 1.94 of this file), though this should probably be - // reevaluated now. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && - (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - FreeRegInactiveCount = inactiveCounts[Reg]; - if (FreeRegInactiveCount == MaxInactiveCount) - break; // We found the one with the max inactive count. - } - } - - // Remember what register we picked so we can skip it next time. - recordRecentlyUsed(FreeReg); - - return FreeReg; -} - -/// getFreePhysReg - return a free physical register for this virtual register -/// interval if we have one, otherwise return 0. -unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { - SmallVector<unsigned, 256> inactiveCounts; - unsigned MaxInactiveCount = 0; - - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - - // If this is not in a related reg class to the register we're allocating, - // don't check it. - const TargetRegisterClass *RegRC = mri_->getRegClass(reg); - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { - reg = vrm_->getPhys(reg); - if (inactiveCounts.size() <= reg) - inactiveCounts.resize(reg+1); - ++inactiveCounts[reg]; - MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); - } - } - - // If copy coalescer has assigned a "preferred" register, check if it's - // available first. - unsigned Preference = vrm_->getRegAllocPref(cur->reg); - if (Preference) { - DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && - RC->contains(Preference)) - return Preference; - } - - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); -} - -FunctionPass* llvm::createLinearScanRegisterAllocator() { - return new RALinScan(); -} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 0d2cf2d6184c..a2846145bc7e 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -32,14 +32,17 @@ #define DEBUG_TYPE "regalloc" #include "RenderMachineFunction.h" -#include "Splitter.h" +#include "Spiller.h" #include "VirtRegMap.h" -#include "VirtRegRewriter.h" #include "RegisterCoalescer.h" +#include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/RegAllocPBQP.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -54,6 +57,7 @@ #include <limits> #include <memory> #include <set> +#include <sstream> #include <vector> using namespace llvm; @@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +#ifndef NDEBUG static cl::opt<bool> -pbqpPreSplitting("pbqp-pre-splitting", - cl::desc("Pre-split before PBQP register allocation."), - cl::init(false), cl::Hidden); +pbqpDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif namespace { @@ -88,11 +94,9 @@ public: : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } @@ -132,6 +136,7 @@ private: MachineRegisterInfo *mri; RenderMachineFunction *rmf; + std::auto_ptr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -141,10 +146,6 @@ private: /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - /// \brief Adds a stack interval if the given live interval has been - /// spilled. Used to support stack slot coloring. - void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, @@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; - + } const PBQPRAProblem::AllowedSet& @@ -195,9 +196,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector<const LiveInterval*> LIVector; - + ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots(); MachineRegisterInfo *mri = &mf->getRegInfo(); - const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem()); PBQP::Graph &g = p->getGraph(); @@ -214,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, BitVector reservedRegs = tri->getReservedRegs(*mf); - // Iterate over vregs. + // Iterate over vregs. for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); vregItr != vregEnd; ++vregItr) { unsigned vreg = *vregItr; @@ -224,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; VRAllowed vrAllowed; - ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf); + ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; if (!reservedRegs.test(preg)) { @@ -232,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } } - // Remove any physical registers which overlap. + RegSet overlappingPRegs; + + // Record physical registers whose ranges overlap. for (RegSet::const_iterator pregItr = pregs.begin(), pregEnd = pregs.end(); pregItr != pregEnd; ++pregItr) { @@ -243,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, continue; } - if (!vregLI->overlaps(*pregLI)) { - continue; + if (vregLI->overlaps(*pregLI)) + overlappingPRegs.insert(preg); + } + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(), + rmEnd = regMaskSlots.end(); + rmItr != rmEnd; ++rmItr) { + SlotIndex rmIdx = *rmItr; + if (vregLI->liveAt(rmIdx)) { + MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); + const uint32_t* regMask = 0; + for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), + mopEnd = rmMI->operands_end(); + mopItr != mopEnd; ++mopItr) { + if (mopItr->isRegMask()) { + regMask = mopItr->getRegMask(); + break; + } + } + assert(regMask != 0 && "Couldn't find register mask."); + regMaskOverlaps.setBitsNotInMask(regMask); } + } + + for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { + if (regMaskOverlaps.test(preg)) + overlappingPRegs.insert(preg); + } + + for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), + pregEnd = overlappingPRegs.end(); + pregItr != pregEnd; ++pregItr) { + unsigned preg = *pregItr; // Remove the register from the allowed set. VRAllowed::iterator eraseItr = @@ -256,7 +291,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } // Also remove any aliases. - const unsigned *aliasItr = tri->getAliasSet(preg); + const uint16_t *aliasItr = tri->getAliasSet(preg); if (aliasItr != 0) { for (; *aliasItr != 0; ++aliasItr) { VRAllowed::iterator eraseItr = @@ -270,7 +305,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeItr node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -371,7 +406,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( const float copyFactor = 0.5; // Cost of copy relative to load. Current // value plucked randomly out of the air. - + PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, loopInfo->getLoopDepth(mbb)); @@ -382,7 +417,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( } const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); - unsigned pregOpt = 0; + unsigned pregOpt = 0; while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { ++pregOpt; } @@ -407,7 +442,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( std::swap(allowed1, allowed2); } } - + addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, cBenefit); } @@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce( if (preg1 == preg2) { costMat[i + 1][j + 1] += -benefit; - } + } } } } void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesCFG(); + au.addRequired<AliasAnalysis>(); + au.addPreserved<AliasAnalysis>(); au.addRequired<SlotIndexes>(); au.addPreserved<SlotIndexes>(); au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); - au.addRequiredID(RegisterCoalescerPassID); if (customPassID) au.addRequiredID(*customPassID); au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); - if (pbqpPreSplitting) - au.addRequired<LoopSplitter>(); au.addRequired<VirtRegMap>(); au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); @@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() { } } -void RegAllocPBQP::addStackInterval(const LiveInterval *spilled, - MachineRegisterInfo* mri) { - int stackSlot = vrm->getStackSlot(spilled->reg); - - if (stackSlot == VirtRegMap::NO_STACK_SLOT) { - return; - } - - const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); - LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); - - VNInfo *vni; - if (stackInterval.getNumValNums() != 0) { - vni = stackInterval.getValNumInfo(0); - } else { - vni = stackInterval.getNextValue( - SlotIndex(), 0, lss->getVNInfoAllocator()); - } - - LiveInterval &rhsInterval = lis->getInterval(spilled->reg); - stackInterval.MergeRangesInAsValue(rhsInterval, vni); -} - bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Solution &solution) { // Set to true if we have any spills @@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, unsigned alloc = solution.getSelection(node); if (problem.isPRegOption(vreg, alloc)) { - unsigned preg = problem.getPRegForOption(vreg, alloc); + unsigned preg = problem.getPRegForOption(vreg, alloc); DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n"); assert(preg != 0 && "Invalid preg selected."); - vrm->assignVirt2Phys(vreg, preg); + vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - const LiveInterval* spillInterval = &lis->getInterval(vreg); - double oldWeight = spillInterval->weight; - rmf->rememberUseDefs(spillInterval); - std::vector<LiveInterval*> newSpills = - lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); - addStackInterval(spillInterval, mri); - rmf->rememberSpills(spillInterval, newSpills); - - (void) oldWeight; + SmallVector<LiveInterval*, 8> newSpills; + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); + spiller->spill(LRE); + DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " - << oldWeight << ", New vregs: "); + << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (std::vector<LiveInterval*>::const_iterator - itr = newSpills.begin(), end = newSpills.end(); + for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); DEBUG(dbgs() << (*itr)->reg << " "); @@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !newSpills.empty(); + anotherRoundNeeded |= !LRE.empty(); } else { - assert(false && "Unknown allocation option."); + llvm_unreachable("Unknown allocation option."); } } @@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { tm = &mf->getTarget(); tri = tm->getRegisterInfo(); tii = tm->getInstrInfo(); - mri = &mf->getRegInfo(); + mri = &mf->getRegInfo(); lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); @@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); + spiller.reset(createInlineSpiller(*this, MF, *vrm)); + mri->freezeReservedRegs(MF); DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); @@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); + const Function* func = mf->getFunction(); + std::string fqn = + func->getParent()->getModuleIdentifier() + "." + + func->getName().str(); + (void)fqn; + // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { @@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::auto_ptr<PBQPRAProblem> problem = builder->build(mf, lis, loopInfo, vregsToAlloc); + +#ifndef NDEBUG + if (pbqpDumpGraphs) { + std::ostringstream rs; + rs << round; + std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); + std::string tmp; + raw_fd_ostream os(graphFileName.c_str(), tmp); + DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" + << graphFileName << "\"\n"); + problem->getGraph().dump(os); + } +#endif + PBQP::Solution solution = PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( problem->getGraph()); @@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter - std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); + vrm->rewrite(lis->getSlotIndexes()); - rewriter->runOnMachineFunction(*mf, *vrm, lis); + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + vrm->clearAllVirt(); + mri->clearVirtRegs(); return true; } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 786d279c2b8c..17165fa72665 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -18,12 +18,16 @@ #include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" - +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<unsigned> +StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), + cl::desc("Limit all regclasses to N registers")); + RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) {} @@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Does this MF have different CSRs? - const unsigned *CSR = TRI->getCalleeSavedRegs(MF); + const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); if (Update || CSR != CalleeSaved) { // Build a CSRNum map. Every CSR alias gets an entry pointing to the last // overlapping CSR. CSRNum.clear(); CSRNum.resize(TRI->getNumRegs(), 0); for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (const unsigned *AS = TRI->getOverlaps(Reg); + for (const uint16_t *AS = TRI->getOverlaps(Reg); unsigned Alias = *AS; ++AS) CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... Update = true; @@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. - ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF); + ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF); for (unsigned i = 0; i != RawOrder.size(); ++i) { unsigned PhysReg = RawOrder[i]; // Remove reserved registers from the allocation order. @@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // CSR aliases go after the volatile registers, preserve the target's order. std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + // Register allocator stress test. Clip register class to N registers. + if (StressRA && RCI.NumRegs > StressRA) + RCI.NumRegs = StressRA; + // Check if RC is a proper sub-class. if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h index 2c1407096cd7..400e1f48ce54 100644 --- a/lib/CodeGen/RegisterClassInfo.h +++ b/lib/CodeGen/RegisterClassInfo.h @@ -49,7 +49,7 @@ class RegisterClassInfo { // Callee saved registers of last MF. Assumed to be valid until the next // runOnFunction() call. - const unsigned *CalleeSaved; + const uint16_t *CalleeSaved; // Map register number to CalleeSaved index + 1; SmallVector<uint8_t, 4> CSRNum; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9b414d6212c7..75f88cafdf01 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regcoalescing" +#define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" #include "LiveDebugVariables.h" #include "RegisterClassInfo.h" @@ -169,10 +169,6 @@ namespace { /// it as well. bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); - /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the - /// VNInfo copy flag for DstReg and all aliases. - void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); - /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); @@ -197,7 +193,7 @@ namespace { }; } /// end anonymous namespace -char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID; +char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addPreservedID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->isDefByCopy()) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + if (BValNo->def != CopyIdx) return false; // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); // The live range might not exist after fun with physreg coalescing. if (ALR == IntA.end()) return false; VNInfo *AValNo = ALR->valno; - // If it's re-defined by an early clobber somewhere in the live range, then - // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. - // See PR3149: - // 172 %ECX<def> = MOV32rr %reg1039<kill> - // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, - // %EAX<kill>, - // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 - // 188 %EAX<def> = MOV32rr %EAX<kill> - // 196 %ECX<def> = MOV32rr %ECX<kill> - // 204 %ECX<def> = MOV32rr %ECX<kill> - // 212 %EAX<def> = MOV32rr %EAX<kill> - // 220 %EAX<def> = MOV32rr %EAX - // 228 %reg1039<def> = MOV32rr %ECX<kill> - // The early clobber operand ties ECX input to the ECX def. - // - // The live interval of ECX is represented as this: - // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) - // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->hasRedefByEC()) - return false; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. - if (!CP.isCoalescable(AValNo->getCopy())) + MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); + if (!CP.isCoalescable(ACopyMI)) return false; // Get the LiveRange in IntB that this value number starts with. @@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // of its aliases is overlapping the live interval of the virtual register. // If so, do not coalesce. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { DEBUG({ dbgs() << "\t\tInterfere with alias "; @@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. - BValNo->def = FillerStart; - BValNo->setCopy(0); + BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: // [ValLR.end, BLR.begin) of either value number, then we merge the @@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // If the IntB live range is assigned to a physical register, and if that // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { + for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { if (!LIS->hasInterval(*SR)) continue; LiveInterval &SRLI = LIS->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, + SRLI.getNextValue(FillerStart, LIS->getVNInfoAllocator()))); } } @@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, ValLREndInst->getOperand(UIdx).setIsKill(false); } - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. + // Rewrite the copy. If the copy instruction was killing the destination + // register before the merge, find the last use and trim the live range. That + // will also add the isKill marker. + CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), + *TRI); if (ALR->end == CopyIdx) LIS->shrinkToUses(&IntA); @@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!LIS->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || !BValNo->isDefByCopy()) + if (!BValNo || BValNo->def != CopyIdx) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); // If other defs can reach uses of this def, then it's not safe to perform @@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) return false; - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isCommutable()) + if (!DefMI->isCommutable()) return false; // If DefMI is a two-address instruction then commuting it will change the // destination register. @@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Abort if the aliases of IntB.reg have values that are not simply the // clobbers from the superreg. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) if (LIS->hasInterval(*AS) && HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) return false; @@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); + MachineBasicBlock::iterator Pos = DefMI; + MBB->insert(Pos, NewMI); MBB->erase(DefMI); } unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); @@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex(); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // This copy will become a noop. If it's defining a new val#, merge it into // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); + SlotIndex DefIdx = UseIdx.getRegSlot(); VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); if (!DVNI) continue; @@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; - ValNo->setCopy(0); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, bool preserveSrcInt, unsigned DstReg, MachineInstr *CopyMI) { - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (!DefMI) return false; assert(DefMI && "Defining instruction disappeared"); - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isAsCheapAsAMove()) + if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; if (!DefMI->isSafeToMove(TII, AA, SawStore)) return false; + const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; if (!DefMI->isImplicitDef()) { @@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - RemoveCopyFlag(DstReg, CopyMI); - MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); + // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). + // We need to remember these so we can add intervals once we insert + // NewMI into SlotIndexes. + SmallVector<unsigned, 4> NewMIImplDefs; + for (unsigned i = NewMI->getDesc().getNumOperands(), + e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (MO.isReg()) { + assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + NewMIImplDefs.push_back(MO.getReg()); + } + } + // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), e = CopyMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - if (MO.isDef()) - RemoveCopyFlag(MO.getReg(), CopyMI); + if (MO.isReg()) { + assert(MO.isImplicit() && "No explicit operands after implict operands."); + // Discard VReg implicit defs. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + NewMI->addOperand(MO); + } + } } - NewMI->copyImplicitOps(CopyMI); LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { + unsigned reg = NewMIImplDefs[i]; + LiveInterval &li = LIS->getInterval(reg); + VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), + LIS->getVNInfoAllocator()); + LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); + li.addRange(lr); + } + CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); @@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, DstInt = SrcInt; SrcInt = 0; - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); DstInt->removeValNo(DeadVNI); @@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { SmallVector<unsigned,8> Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); - bool Kills = false, Deads = false; // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); - Kills |= MO.isKill(); - Deads |= MO.isDead(); // Make sure we don't create read-modify-write defs accidentally. We // assume here that a SrcReg def cannot be joined into a live DstReg. If @@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { if (JoinedCopies.count(UseMI)) continue; - if (SubIdx) { - // If UseMI was a simple SrcReg def, make sure we didn't turn it into a - // read-modify-write of DstReg. - if (Deads) - UseMI->addRegisterDead(DstReg, TRI); - else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, TRI); - - // Kill flags apply to the whole physical register. - if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, TRI); - } - DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) @@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, const TargetRegisterInfo *TRI) { if (li.empty()) { if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { + for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { if (!LIS->hasInterval(*SR)) continue; LiveInterval &sli = LIS->getInterval(*SR); @@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, /// the val# it defines. If the live interval becomes empty, remove it as well. bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, return removeIntervalIfEmpty(li, LIS, TRI); } -void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, - const MachineInstr *CopyMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); - if (LIS->hasInterval(DstReg)) { - LiveInterval &LI = LIS->getInterval(DstReg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } - if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) - return; - for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) { - if (!LIS->hasInterval(*AS)) - continue; - LiveInterval &LI = LIS->getInterval(*AS); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } -} - /// shouldJoinPhys - Return true if a copy involving a physreg should be joined. /// We need to be careful about coalescing a source physical register with a /// virtual register. Once the coalescing is done, it cannot be broken and these @@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { } } - // SrcReg is guarateed to be the register whose live interval that is + // SrcReg is guaranteed to be the register whose live interval that is // being merged. LIS->removeInterval(CP.getSrcReg()); @@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, // FIXME: This is very conservative. For example, we don't handle // physical registers. - MachineInstr *MI = VNI->getCopy(); + MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) return false; unsigned Dst = MI->getOperand(0).getReg(); @@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, assert(Dst == A); VNInfo *Other = LR->valno; - if (!Other->isDefByCopy()) - return false; - const MachineInstr *OtherMI = Other->getCopy(); + const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); - if (!OtherMI->isFullCopy()) + if (!OtherMI || !OtherMI->isFullCopy()) return false; unsigned OtherDst = OtherMI->getOperand(0).getReg(); @@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // than the full interfeence check below. We allow overlapping live ranges // only when one is a copy of the other. if (CP.isPhys()) { - for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ + // Optimization for reserved registers like ESP. + // We can only merge with a reserved physreg if RHS has a single value that + // is a copy of CP.DstReg(). The live range of the reserved register will + // look like a set of dead defs - we don't properly track the live range of + // reserved registers. + if (RegClassInfo.isReserved(CP.getDstReg())) { + assert(CP.isFlipped() && RHS.containsOneValue() && + "Invalid join with reserved register"); + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { + if (!LIS->hasInterval(*AS)) { + // Make sure at least DstReg itself exists before attempting a join. + if (*AS == CP.getDstReg()) + LIS->getOrCreateInterval(CP.getDstReg()); + continue; + } + if (RHS.overlaps(LIS->getInterval(*AS))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); + return false; + } + } + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + return true; + } + + // Check if a register mask clobbers DstReg. + BitVector UsableRegs; + if (LIS->checkRegMaskInterference(RHS, UsableRegs) && + !UsableRegs.test(CP.getDstReg())) { + DEBUG(dbgs() << "\t\tRegister mask interference.\n"); + return false; + } + + for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ if (!LIS->hasInterval(*AS)) continue; const LiveInterval &LHS = LIS->getInterval(*AS); @@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike()) // Src not defined by a copy? continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; // Figure out the value # from the RHS. LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); @@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. - MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; @@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike()) // Src not defined by a copy? continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; // Figure out the value # from the LHS. LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); @@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. - MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; @@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) - return false; } if (I->end < J->end) @@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MO.getReg(); if (!Reg) continue; + DeadDefs.push_back(Reg); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - DeadDefs.push_back(Reg); // Remat may also enable register class inflation. if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) InflateRegs.push_back(Reg); @@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; @@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // remain alive. if (!TargetRegisterInfo::isPhysicalRegister(reg)) continue; - for (const unsigned *SR = TRI->getSubRegisters(reg); + for (const uint16_t *SR = TRI->getSubRegisters(reg); unsigned S = *SR; ++SR) if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) MI->addRegisterDefined(S, TRI); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 472c48377fef..310b933cab9b 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// +//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the abstract interface for register coalescers, +// This file contains the abstract interface for register coalescers, // allowing them to interact with and query register allocators. // //===----------------------------------------------------------------------===// @@ -47,7 +47,7 @@ namespace llvm { /// CrossClass - True when both regs are virtual, and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the oriignal + /// Flipped - True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index ca02aa1b8143..03bd82e225dc 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -37,7 +37,7 @@ using namespace llvm; void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) RegsAvailable.reset(SubReg); } @@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) { bool RegScavenger::isAliasUsed(unsigned Reg) const { if (isUsed(Reg)) return true; - for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R) + for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R) if (isUsed(*R)) return true; return false; @@ -59,9 +59,6 @@ void RegScavenger::initRegState() { // All registers started out unused. RegsAvailable.set(); - // Reserved registers are always used. - RegsAvailable ^= ReservedRegs; - if (!MBB) return; @@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); RegsAvailable.resize(NumPhysRegs); + KillRegs.resize(NumPhysRegs); + DefRegs.resize(NumPhysRegs); // Create reserved registers bitvector. ReservedRegs = TRI->getReservedRegs(MF); // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); - const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); if (CSRegs != NULL) for (unsigned i = 0; CSRegs[i]; ++i) CalleeSavedRegs.set(CSRegs[i]); @@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(Reg); - for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) - BV.set(*R); -} - -void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) { - BV.set(Reg); - for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) BV.set(*R); } @@ -148,12 +146,12 @@ void RegScavenger::forward() { // predicated, conservatively assume "kill" markers do not actually kill the // register. Similarly ignores "dead" markers. bool isPred = TII->isPredicated(MI); - BitVector EarlyClobberRegs(NumPhysRegs); - BitVector KillRegs(NumPhysRegs); - BitVector DefRegs(NumPhysRegs); - BitVector DeadRegs(NumPhysRegs); + KillRegs.reset(); + DefRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -164,21 +162,19 @@ void RegScavenger::forward() { // Ignore undef uses. if (MO.isUndef()) continue; - // Two-address operands implicitly kill. - if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) + if (!isPred && MO.isKill()) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); if (!isPred && MO.isDead()) - addRegWithSubRegs(DeadRegs, Reg); + addRegWithSubRegs(KillRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); - if (MO.isEarlyClobber()) - addRegWithAliases(EarlyClobberRegs, Reg); } } // Verify uses and defs. +#ifndef NDEBUG for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) @@ -199,17 +195,18 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) if (isUsed(SubReg)) { SubUsed = true; break; } - assert(SubUsed && "Using an undefined register!"); + if (!SubUsed) { + MBB->getParent()->verify(NULL, "In Register Scavenger"); + llvm_unreachable("Using an undefined register!"); + } (void)SubUsed; } - assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && - "Using an early clobbered register!"); } else { assert(MO.isDef()); #if 0 @@ -221,18 +218,20 @@ void RegScavenger::forward() { #endif } } +#endif // NDEBUG // Commit the changes. setUnused(KillRegs); - setUnused(DeadRegs); setUsed(DefRegs); } void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + used = RegsAvailable; + used.flip(); if (includeReserved) - used = ~RegsAvailable; + used |= ReservedRegs; else - used = ~RegsAvailable & ~ReservedRegs; + used.reset(ReservedRegs); } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { @@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, // Remove any candidates touched by instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { @@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, continue; } Candidates.reset(MO.getReg()); - for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++) + for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++) Candidates.reset(*R); } // If we're not in a virtual reg's live range, this is a valid @@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // RegsAvailable, as RegsAvailable does not take aliases into account. // That's what getRegsAvailable() is for. BitVector Available = getRegsAvailable(RC); - - if ((Candidates & Available).any()) - Candidates &= Available; + Available &= Candidates; + if (Available.any()) + Candidates = Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index 8b02ec44273a..6020908d9112 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===// +//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===// // // The LLVM Compiler Infrastructure // @@ -560,12 +560,13 @@ namespace llvm { // For uses/defs recorded use/def indexes override current liveness and // instruction operands (Only for the interval which records the indexes). - if (i.isUse() || i.isDef()) { + // FIXME: This is all wrong, uses and defs share the same slots. + if (i.isEarlyClobber() || i.isRegister()) { UseDefs::const_iterator udItr = useDefs.find(li); if (udItr != useDefs.end()) { const SlotSet &slotSet = udItr->second; if (slotSet.count(i)) { - if (i.isUse()) { + if (i.isEarlyClobber()) { return Used; } // else @@ -586,9 +587,9 @@ namespace llvm { return AliveStack; } } else { - if (i.isDef() && mi->definesRegister(li->reg, tri)) { + if (i.isRegister() && mi->definesRegister(li->reg, tri)) { return Defined; - } else if (i.isUse() && mi->readsRegister(li->reg)) { + } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { return Used; } else { if (vrm == 0 || @@ -804,7 +805,7 @@ namespace llvm { os << indent + s(2) << "<tr height=6ex>\n"; // Render the code column. - if (i.isLoad()) { + if (i.isBlock()) { MachineBasicBlock *mbb = sis->getMBBFromIndex(i); mi = sis->getInstructionFromIndex(i); @@ -823,7 +824,7 @@ namespace llvm { } os << indent + s(4) << "</td>\n"; } else { - i = i.getStoreIndex(); // <- Will be incremented to the next index. + i = i.getDeadSlot(); // <- Will be incremented to the next index. continue; } } @@ -952,10 +953,10 @@ namespace llvm { rItr != rEnd; ++rItr) { const MachineInstr *mi = &*rItr; if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); } if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); } } } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1e9b5c89f172..8fd64265fda6 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -31,6 +31,8 @@ static cl::opt<bool> StressSchedOpt( cl::desc("Stress test instruction scheduling")); #endif +void SchedulingPriorityQueue::anchor() { } + ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), @@ -44,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} -/// getInstrDesc helper to handle SDNodes. -const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { - if (!Node || !Node->isMachineOpcode()) return NULL; - return &TII->get(Node->getMachineOpcode()); -} - -/// dump - dump the schedule. -void ScheduleDAG::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(this); - else - dbgs() << "**** NOOP ****\n"; - } -} - - -/// Run - perform scheduling. -/// -void ScheduleDAG::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { - BB = bb; - InsertPos = insertPos; - +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { SUnits.clear(); - Sequence.clear(); EntrySU = SUnit(); ExitSU = SUnit(); +} - Schedule(); - - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); +/// getInstrDesc helper to handle SDNodes. +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); } /// addPred - This adds the specified edge as a pred of the current node if @@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } - dbgs() << "#"; - dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); if (I->isAssignedRegDep()) - dbgs() << " Reg=" << G->TRI->getName(I->getReg()); + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } @@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } - dbgs() << "#"; - dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); @@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } #ifndef NDEBUG -/// VerifySchedule - Verify that all SUnits were scheduled and that -/// their state is consistent. +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. /// -void ScheduleDAG::VerifySchedule(bool isBottomUp) { +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { bool AnyNotSched = false; unsigned DeadNodes = 0; - unsigned Noops = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { if (!SUnits[i].isScheduled) { if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { @@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } } } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + return SUnits.size() - DeadNodes; } #endif diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp deleted file mode 100644 index f8b1bc76eb8b..000000000000 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements the Emit routines for the ScheduleDAG class, which creates -// MachineInstrs according to the computed schedule. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-RA-sched" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -void ScheduleDAG::EmitNoop() { - TII->insertNoop(*BB, InsertPos); -} - -void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, - DenseMap<SUnit*, unsigned> &VRBaseMap) { - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->CopyDstRC) { - // Copy to physical register. - DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); - assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); - // Find the destination physical register. - unsigned Reg = 0; - for (SUnit::const_succ_iterator II = SU->Succs.begin(), - EE = SU->Succs.end(); II != EE; ++II) { - if (II->isCtrl()) continue; // ignore chain preds - if (II->getReg()) { - Reg = II->getReg(); - break; - } - } - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(VRI->second); - } else { - // Copy from physical register. - assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); - bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(I->getReg()); - } - break; - } -} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 34b8ab0b47f2..6be1ab7f5b08 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sched-instrs" -#include "ScheduleDAGInstrs.h" #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -33,25 +34,17 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) + const MachineDominatorTree &mdt, + bool IsPostRAFlag, + LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), - Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), - LoopRegs(MLI, MDT), FirstDbgValue(0) { + InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), + IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), + FirstDbgValue(0) { + assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); -} - -/// Run - perform scheduling. -/// -void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount) { - BB = bb; - Begin = begin; - InsertPosIndex = endcount; - - ScheduleDAG::Run(bb, end); + assert(!(IsPostRA && MRI.getNumVirtRegs()) && + "Virtual registers must be removed prior to PostRA scheduling"); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) - if (BB == ML->getLoopLatch()) { - MachineBasicBlock *Header = ML->getHeader(); - for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), - E = Header->livein_end(); I != E; ++I) - LoopLiveInRegs.insert(*I); + if (BB == ML->getLoopLatch()) LoopRegs.VisitLoop(ML); - } } -/// AddSchedBarrierDeps - Add dependencies from instructions in the current +void ScheduleDAGInstrs::finishBlock() { + // Nothing to do. +} + +/// Initialize the map with the number of registers. +void Reg2SUnitsMap::setRegLimit(unsigned Limit) { + PhysRegSet.setUniverse(Limit); + SUnits.resize(Limit); +} + +/// Clear the map without deallocating storage. +void Reg2SUnitsMap::clear() { + for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { + SUnits[*I].clear(); + } + PhysRegSet.clear(); +} + +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + RegionBegin = begin; + RegionEnd = end; + EndIndex = endcount; + MISUnitMap.clear(); + + // Check to see if the scheduler cares about latencies. + UnitLatencies = forceUnitLatencies(); + + ScheduleDAG::clearDAG(); +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to /// make sure instructions which define registers that are either used by @@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { /// especially important when the definition latency of the return value(s) /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. -void ScheduleDAGInstrs::AddSchedBarrierDeps() { - MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && - (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier()); + (ExitMI->isCall() || ExitMI->isBarrier()); if (ExitMI && AllDepKnown) { // If it's a call or a barrier, add dependencies on the defs and uses of // instruction. @@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); - Uses[Reg].push_back(&ExitSU); + if (TRI->isPhysicalRegister(Reg)) + Uses[Reg].push_back(&ExitSU); + else { + assert(!IsPostRA && "Virtual register encountered after regalloc."); + addVRegUseDeps(&ExitSU, i); + } } } else { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. - SmallSet<unsigned, 8> Seen; + assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - if (Seen.insert(Reg)) + if (!Uses.contains(Reg)) Uses[Reg].push_back(&ExitSU); } } } -void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { - // We'll be allocating one SUnit for each instruction, plus one for - // the region exit node. +/// MO is an operand of SU's instruction that defines a physical register. Add +/// data dependencies from SU to any uses of the physical register. +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, + const MachineOperand &MO) { + assert(MO.isDef() && "expect physreg def"); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + unsigned DataLatency = SU->Latency; + + for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + if (!Uses.contains(*Alias)) + continue; + std::vector<SUnit*> &UseList = Uses[*Alias]; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU == SU) + continue; + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about it. + if (SpecialAddressLatency != 0 && !UnitLatencies && + UseSU != &ExitSU) { + MachineInstr *UseMI = UseSU->getInstr(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); + assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); + if (RegUseIndex >= 0 && + (UseMI->mayLoad() || UseMI->mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + if (!UnitLatencies) { + computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); + } + UseSU->addPred(dep); + } + } +} + +/// addPhysRegDeps - Add register dependencies (data, anti, and output) from +/// this SUnit to following instructions in the same scheduling region that +/// depend the physical register referenced at OperIdx. +void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + if (!Defs.contains(*Alias)) + continue; + std::vector<SUnit *> &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU == &ExitSU) + continue; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(*Alias))) { + if (Kind == SDep::Anti) + DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); + else { + unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, + DefSU->getInstr()); + DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); + } + } + } + } + + if (!MO.isDef()) { + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's uses. + // Push this SUnit on the use list. + Uses[MO.getReg()].push_back(SU); + } + else { + addPhysRegDataDeps(SU, MO); + + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's defs. + std::vector<SUnit *> &DefList = Defs[MO.getReg()]; + + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + if (!UnitLatencies && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const MCInstrDesc &UseMCID = UseMI->getDesc(); + const TargetSubtargetInfo &ST = + TM.getSubtarget<TargetSubtargetInfo>(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseMCID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artificial edge. + ExitSU.addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + + // clear this register's use list + if (Uses.contains(MO.getReg())) + Uses[MO.getReg()].clear(); + + if (!MO.isDead()) + DefList.clear(); + + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + if (SU->isCall) { + while (!DefList.empty() && DefList.back()->isCall) + DefList.pop_back(); + } + // Defs are pushed in the order they are visited and never reordered. + DefList.push_back(SU); + } +} + +/// addVRegDefDeps - Add register output and data dependencies from this SUnit +/// to instructions that occur later in the same scheduling region if they read +/// from or write to the virtual register defined at OperIdx. +/// +/// TODO: Hoist loop induction variable increments. This has to be +/// reevaluated. Generally, IV scheduling should be done before coalescing. +void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // SSA defs do not have output/anti dependencies. + // The current operand is a def, so we have at least one. + if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) + return; + + // Add output dependence to the next nearest def of this vreg. + // + // Unless this definition is dead, the output dependence should be + // transitively redundant with antidependencies from this definition's + // uses. We're conservative for now until we have a way to guarantee the uses + // are not eliminated sometime during scheduling. The output dependence edge + // is also useful if output latency exceeds def-use latency. + VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + if (DefI == VRegDefs.end()) + VRegDefs.insert(VReg2SUnit(Reg, SU)); + else { + SUnit *DefSU = DefI->SU; + if (DefSU != SU && DefSU != &ExitSU) { + unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, + DefSU->getInstr()); + DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); + } + DefI->SU = SU; + } +} + +/// addVRegUseDeps - Add a register data dependency if the instruction that +/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a +/// register antidependency from this SUnit to instructions that occur later in +/// the same scheduling region if they write the virtual register. +/// +/// TODO: Handle ExitSU "uses" properly. +void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // Lookup this operand's reaching definition. + assert(LIS && "vreg dependencies requires LiveIntervals"); + SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval *LI = &LIS->getInterval(Reg); + VNInfo *VNI = LI->getVNInfoBefore(UseIdx); + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); + // Phis and other noninstructions (after coalescing) have a NULL Def. + if (Def) { + SUnit *DefSU = getSUnit(Def); + if (DefSU) { + // The reaching Def lives within this scheduling region. + // Create a data dependence. + // + // TODO: Handle "special" address latencies cleanly. + const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + if (!UnitLatencies) { + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); + } + SU->addPred(dep); + } + } + + // Add antidependence to the following def of the vreg it uses. + VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + if (DefI != VRegDefs.end() && DefI->SU != SU) + DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); +} + +/// Create an SUnit for each real instruction, numbered in top-down toplological +/// order. The instruction order A < B, implies that no edge exists from B to A. +/// +/// Map each real instruction to its SUnit. +/// +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. +void ScheduleDAGInstrs::initSUnits() { + // We'll be allocating one SUnit for each real instruction in the region, + // which is contained within a basic block. SUnits.reserve(BB->size()); + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SUnit *SU = newSUnit(MI); + MISUnitMap[MI] = SU; + + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + computeLatency(SU); + } +} + +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { + // Create an SUnit for each real instruction. + initSUnits(); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; - // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); - - // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); FirstDbgValue = NULL; + assert(Defs.empty() && Uses.empty() && + "Only BuildGraph should update Defs/Uses"); + Defs.setRegLimit(TRI->getNumRegs()); + Uses.setRegLimit(TRI->getNumRegs()); + + assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + // FIXME: Allow SparseSet to reserve space for the creation of virtual + // registers during scheduling. Don't artificially inflate the Universe + // because we want to assert that vregs are not created during DAG building. + VRegDefs.setUniverse(MRI.getNumVirtRegs()); + // Model data dependencies between instructions being scheduled and the // ExitSU. - AddSchedBarrierDeps(); - - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); - } + addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; - for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); if (MI && PrevMI) { @@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - assert(!MCID.isTerminator() && !MI->isLabel() && + assert(!MI->isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); - // Create the SUnit for this MI. - SUnit *SU = NewSUnit(MI); - SU->isCall = MCID.isCall(); - SU->isCommutable = MCID.isCommutable(); - // Assign the Latency field of SU using target-provided information. - if (UnitLatencies) - SU->Latency = 1; - else - ComputeLatency(SU); + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); // Add register-based dependencies (data, anti, and output). for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); - - std::vector<SUnit *> &UseList = Uses[Reg]; - // Defs are push in the order they are visited and never reordered. - std::vector<SUnit *> &DefList = Defs[Reg]; - // Optionally add output and anti dependencies. For anti - // dependencies we use a latency of 0 because for a multi-issue - // target we want to allow the defining instruction to issue - // in the same cycle as the using instruction. - // TODO: Using a latency of 1 here for output dependencies assumes - // there's no cost for reusing registers. - SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; - if (DefSU == &ExitSU) - continue; - if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector<SUnit *> &MemDefList = Defs[*Alias]; - for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) { - SUnit *DefSU = MemDefList[i]; - if (DefSU == &ExitSU) - continue; - if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(*Alias))) - DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); - } - } - - if (MO.isDef()) { - // Add any data dependencies. - unsigned DataLatency = SU->Latency; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Do this for register aliases too. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies && - UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); - assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); - if (RegUseIndex >= 0 && - (UseMCID.mayLoad() || UseMCID.mayStore()) && - (unsigned)RegUseIndex < UseMCID.getNumOperands() && - UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); - } - UseSU->addPred(dep); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector<SUnit *> &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); - } - UseSU->addPred(dep); - } - } - - // If a def is going to wrap back around to the top of the loop, - // backschedule it. - if (!UnitLatencies && DefList.empty()) { - LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); - if (I != LoopRegs.Deps.end()) { - const MachineOperand *UseMO = I->second.first; - unsigned Count = I->second.second; - const MachineInstr *UseMI = UseMO->getParent(); - unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - // TODO: If we knew the total depth of the region here, we could - // handle the case where the whole loop is inside the region but - // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseMCID.getNumOperands()) { - // Currently, we only support scheduling regions consisting of - // single basic blocks. Check to see if the instruction is in - // the same region by checking to see if it has the same parent. - if (UseMI->getParent() != MI->getParent()) { - unsigned Latency = SU->Latency; - if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) - Latency += SpecialAddressLatency; - // This is a wild guess as to the portion of the latency which - // will be overlapped by work done outside the current - // scheduling region. - Latency -= std::min(Latency, Count); - // Add the artificial edge. - ExitSU.addPred(SDep(SU, SDep::Order, Latency, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - } else if (SpecialAddressLatency > 0 && - UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { - // The entire loop body is within the current scheduling region - // and the latency of this operation is assumed to be greater - // than the latency of the loop. - // TODO: Recursively mark data-edge predecessors as - // isScheduleHigh too. - SU->isScheduleHigh = true; - } - } - LoopRegs.Deps.erase(I); - } - } - - UseList.clear(); - if (!MO.isDead()) - DefList.clear(); - - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) - DefList.pop_back(); - } - DefList.push_back(SU); - } else { - UseList.push_back(SU); + if (TRI->isPhysicalRegister(Reg)) + addPhysRegDeps(SU, j); + else { + assert(!IsPostRA && "Virtual register encountered!"); + if (MO.isDef()) + addVRegDefDeps(SU, j); + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(SU, j); } } @@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (MCID.isCall() || MI->hasUnmodeledSideEffects() || + if (MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (MCID.mayStore()) { + } else if (MI->mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (MCID.mayLoad()) { + } else if (MI->mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { @@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (PrevMI) FirstDbgValue = PrevMI; - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - Defs[i].clear(); - Uses[i].clear(); - } + Defs.clear(); + Uses.clear(); + VRegDefs.clear(); PendingLoads.clear(); } -void ScheduleDAGInstrs::FinishBlock() { - // Nothing to do. -} - -void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { +void ScheduleDAGInstrs::computeLatency(SUnit *SU) { // Compute the latency for the node. if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; // Simplistic target-independent heuristic: assume that loads take // extra time. - if (SU->getInstr()->getDesc().mayLoad()) + if (SU->getInstr()->mayLoad()) SU->Latency += 2; } else { SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); } } -void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, +void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; @@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... // What we want is to compute latency between def of %D6/%D7 and use of // %Q3 instead. - DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (DefMI->getOperand(Op2).isReg()) + DefIdx = Op2; } MachineInstr *UseMI = Use->getInstr(); // For all uses of the register, calculate the maxmimum latency @@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { return oss.str(); } -// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { - // For MachineInstr-based scheduling, we're rescheduling the instructions in - // the block, so start by removing them from the block. - while (Begin != InsertPos) { - MachineBasicBlock::iterator I = Begin; - ++Begin; - BB->remove(I); - } - - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) - BB->insert(InsertPos, FirstDbgValue); - - // Then re-insert them according to the given schedule. - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - BB->insert(InsertPos, SU->getInstr()); - else - // Null SUnit* is a noop. - EmitNoop(); - } - - // Update the Begin iterator, as the first instruction in the block - // may have been scheduled later. - if (!Sequence.empty()) - Begin = Sequence[0]->getInstr(); - - // Reinsert any remaining debug_values. - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineInstr *OrigPrivMI = P.second; - BB->insertAfter(OrigPrivMI, DbgValue); - } - DbgValues.clear(); - FirstDbgValue = NULL; - return BB; +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h deleted file mode 100644 index 666bdf548c71..000000000000 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ /dev/null @@ -1,212 +0,0 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ScheduleDAGInstrs class, which implements -// scheduling for a MachineInstr-based dependency graph. -// -//===----------------------------------------------------------------------===// - -#ifndef SCHEDULEDAGINSTRS_H -#define SCHEDULEDAGINSTRS_H - -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include <map> - -namespace llvm { - class MachineLoopInfo; - class MachineDominatorTree; - - /// LoopDependencies - This class analyzes loop-oriented register - /// dependencies, which are used to guide scheduling decisions. - /// For example, loop induction variable increments should be - /// scheduled as soon as possible after the variable's last use. - /// - class LLVM_LIBRARY_VISIBILITY LoopDependencies { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - - public: - typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > - LoopDeps; - LoopDeps Deps; - - LoopDependencies(const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) : - MLI(mli), MDT(mdt) {} - - /// VisitLoop - Clear out any previous state and analyze the given loop. - /// - void VisitLoop(const MachineLoop *Loop) { - assert(Deps.empty() && "stale loop dependencies"); - - MachineBasicBlock *Header = Loop->getHeader(); - SmallSet<unsigned, 8> LoopLiveIns; - for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), - LE = Header->livein_end(); LI != LE; ++LI) - LoopLiveIns.insert(*LI); - - const MachineDomTreeNode *Node = MDT.getNode(Header); - const MachineBasicBlock *MBB = Node->getBlock(); - assert(Loop->contains(MBB) && - "Loop does not contain header!"); - VisitRegion(Node, MBB, Loop, LoopLiveIns); - } - - private: - void VisitRegion(const MachineDomTreeNode *Node, - const MachineBasicBlock *MBB, - const MachineLoop *Loop, - const SmallSet<unsigned, 8> &LoopLiveIns) { - unsigned Count = 0; - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (LoopLiveIns.count(MOReg)) - Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); - } - ++Count; // Not every iteration due to dbg_value above. - } - - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - for (std::vector<MachineDomTreeNode*>::const_iterator I = - Children.begin(), E = Children.end(); I != E; ++I) { - const MachineDomTreeNode *ChildNode = *I; - MachineBasicBlock *ChildBlock = ChildNode->getBlock(); - if (Loop->contains(ChildBlock)) - VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); - } - } - }; - - /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of - /// MachineInstrs. - class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - const MachineFrameInfo *MFI; - const InstrItineraryData *InstrItins; - - /// Defs, Uses - Remember where defs and uses of each physical register - /// are as we iterate upward through the instructions. This is allocated - /// here instead of inside BuildSchedGraph to avoid the need for it to be - /// initialized and destructed for each block. - std::vector<std::vector<SUnit *> > Defs; - std::vector<std::vector<SUnit *> > Uses; - - /// PendingLoads - Remember where unknown loads are after the most recent - /// unknown store, as we iterate. As with Defs and Uses, this is here - /// to minimize construction/destruction. - std::vector<SUnit *> PendingLoads; - - /// LoopRegs - Track which registers are used for loop-carried dependencies. - /// - LoopDependencies LoopRegs; - - /// LoopLiveInRegs - Track which regs are live into a loop, to help guide - /// back-edge-aware scheduling. - /// - SmallSet<unsigned, 8> LoopLiveInRegs; - - protected: - - /// DbgValues - Remember instruction that preceeds DBG_VALUE. - typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > - DbgValueVector; - DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; - - public: - MachineBasicBlock::iterator Begin; // The beginning of the range to - // be scheduled. The range extends - // to InsertPos. - unsigned InsertPosIndex; // The index in BB of InsertPos. - - explicit ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt); - - virtual ~ScheduleDAGInstrs() {} - - /// NewSUnit - Creates a new SUnit and return a ptr to it. - /// - SUnit *NewSUnit(MachineInstr *MI) { -#ifndef NDEBUG - const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; -#endif - SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } - - /// Run - perform scheduling. - /// - void Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endindex); - - /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are - /// input. - virtual void BuildSchedGraph(AliasAnalysis *AA); - - /// AddSchedBarrierDeps - Add dependencies from instructions in the current - /// list of instructions being scheduled to scheduling barrier. We want to - /// make sure instructions which define registers that are either used by - /// the terminator or are live-out are properly scheduled. This is - /// especially important when the definition latency of the return value(s) - /// are too high to be hidden by the branch or when the liveout registers - /// used by instructions in the fallthrough block. - void AddSchedBarrierDeps(); - - /// ComputeLatency - Compute node latency. - /// - virtual void ComputeLatency(SUnit *SU); - - /// ComputeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const; - - virtual MachineBasicBlock *EmitSchedule(); - - /// StartBlock - Prepare to perform scheduling in the given block. - /// - virtual void StartBlock(MachineBasicBlock *BB); - - /// Schedule - Order nodes according to selected style, filling - /// in the Sequence member. - /// - virtual void Schedule() = 0; - - /// FinishBlock - Clean up after scheduling in the given block. - /// - virtual void FinishBlock(); - - virtual void dumpNode(const SUnit *SU) const; - - virtual std::string getGraphNodeLabel(const SUnit *SU) const; - }; -} - -#endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 4b55a2284f85..38feee95a58e 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" #include <fstream> using namespace llvm; @@ -42,12 +41,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, @@ -59,7 +58,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, @@ -82,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG /// rendered using 'dot'. /// -void ScheduleDAG::viewGraph() { -// This code is only for debugging! +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! #ifndef NDEBUG - if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + - ":" + BB->getBasicBlock()->getNameStr()); - else - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); + ViewGraph(this, Name, false, Title); #else errs() << "ScheduleDAG::viewGraph is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b80c01ed58b9..3d22035974da 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[StageCycle]; @@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[cycle + i]; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 2282f0e6eb83..a6bdc3be32e0 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG LegalizeTypesGeneric.cpp LegalizeVectorOps.cpp LegalizeVectorTypes.cpp + ResourcePriorityQueue.cpp ScheduleDAGFast.cpp - ScheduleDAGList.cpp ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp + SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp ) - -add_llvm_library_dependencies(LLVMSelectionDAG - LLVMAnalysis - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7b878688df63..d1b998f8d840 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22,7 +22,6 @@ #include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -64,7 +63,24 @@ namespace { bool LegalTypes; // Worklist of all of the nodes that need to be simplified. - std::vector<SDNode*> WorkList; + // + // This has the semantics that when adding to the worklist, + // the item added must be next to be processed. It should + // also only appear once. The naive approach to this takes + // linear time. + // + // To reduce the insert/remove time to logarithmic, we use + // a set and a vector to maintain our worklist. + // + // The set contains the items on the worklist, but does not + // maintain the order they should be visited. + // + // The vector maintains the order nodes should be visited, but may + // contain duplicate or removed nodes. When choosing a node to + // visit, we pop off the order stack until we find an item that is + // also in the contents set. All operations are O(log N). + SmallPtrSet<SDNode*, 64> WorkListContents; + SmallVector<SDNode*, 64> WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -84,18 +100,17 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure it's instance is at the - /// the back (next to be processed.) + /// AddToWorkList - Add to the work list making sure its instance is at the + /// back (next to be processed.) void AddToWorkList(SDNode *N) { - removeFromWorkList(N); - WorkList.push_back(N); + WorkListContents.insert(N); + WorkListOrder.push_back(N); } /// removeFromWorkList - remove all instances of N from the worklist. /// void removeFromWorkList(SDNode *N) { - WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), - WorkList.end()); + WorkListContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -159,7 +174,9 @@ namespace { SDValue visitADD(SDNode *N); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); @@ -181,7 +198,9 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); @@ -279,7 +298,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, + const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. if (Op.getValueType() == MVT::ppcf128) @@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, return LegalOperations ? 0 : 1; case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; case ISD::FMUL: case ISD::FDIV: - if (HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, + Depth + 1); } } @@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) @@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!HonorSignDependentRoundingFPMath()); + assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; - LegalOperations = Level >= NoIllegalOperations; - LegalTypes = Level >= NoIllegalTypes; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - WorkList.reserve(DAG.allnodes_size()); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - WorkList.push_back(I); + AddToWorkList(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // done. Set it to null to avoid confusion. DAG.setRoot(SDValue()); - // while the worklist isn't empty, inspect the node on the end of it and + // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkList.empty()) { - SDNode *N = WorkList.back(); - WorkList.pop_back(); + while (!WorkListContents.empty()) { + SDNode *N; + // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // In order to avoid a linear scan, we use a set (O(log N)) to hold what the + // worklist *should* contain, and check the node we want to visit is should + // actually be visited. + do { + N = WorkListOrder.pop_back_val(); + } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a @@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); @@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); } } @@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. - if (N->hasNUsesOfValue(0, 1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); return SDValue(); } @@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, VT), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (N0C && N0C->isAllOnesValue()) + return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getSExtValue() == 1LL) + if (N1C && N1C->getAPIntValue() == 1LL) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) @@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && - (isPowerOf2_64(N1C->getSExtValue()) || - isPowerOf2_64(-N1C->getSExtValue()))) { + if (N1C && !N1C->isNullValue() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) return SDValue(); - int64_t pow2 = N1C->getSExtValue(); - int64_t abs2 = pow2 > 0 ? pow2 : -pow2; - unsigned lg2 = Log2_64(abs2); + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, @@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. - if (pow2 > 0) + if (N1C->getAPIntValue().isNonNegative()) return SRA; AddToWorkList(SRA.getNode()); @@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && - !TLI.isIntDivCheap()) { + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ORNode, N0.getOperand(1)); } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) + && Level == AfterLegalizeVectorOps) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + // If both incoming values are integers, and the original types are the same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), Load->getDebugLoc(), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); @@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) - if (N1C && N0.getOpcode() == ISD::SRL && + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { @@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero & Mask; + APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. @@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } @@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + + return Op; + } + } + // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV != 0 && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) { + return DAG.getConstant(NewVal, V.getValueType()); + } + break; + } case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. @@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign); else Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), @@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, MVT::i32)); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, Align); + false, false, false, Align); } return SDValue(); @@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - OrigAlign); + LN0->isInvariant(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), @@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); @@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); // fold (fadd A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && - N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); @@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N0; // fold (fsub 0, B) -> -B - if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations)) + if (DAG.getTarget().Options.UnsafeFPMath && + N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // If 'unsafe math' is enabled, fold + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + return SDValue(); } @@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); // fold (fmul A, 0) -> 0 - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N1; // fold (fmul A, 0) -> 0, vector edition. - if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + if (DAG.getTarget().Options.UnsafeFPMath && + ISD::isBuildVectorAllZeros(N1.getNode())) return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) @@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, @@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations)) + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } +/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that +/// uses N as its base pointer and that N may be folded in the load / store +/// addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = Use->getValueType(0); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getValue().getValueType(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); +} + /// CombineToPreIndexedLoadStore - Try turning a load / store into a /// pre-indexed load / store when the base pointer is an add or subtract /// and it has other uses besides the load / store. After the @@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// the add / subtract in and all of its other uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; - if (!((Use->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || - (Use->getOpcode() == ISD::STORE && - cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } @@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { /// load / store effectively and all of its uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { continue; // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr. + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. @@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!((UseUse->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || - (UseUse->getOpcode() == ISD::STORE && - cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (!LD->isVolatile()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. - if (N->hasNUsesOfValue(0, 0)) { + if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc @@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); @@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - NewAlign); + LD->isInvariant(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), @@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - false, false, LDAlign); + false, false, false, LDAlign); SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), NewLD, ST->getBasePtr(), @@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); @@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return InOp; } + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa<ConstantSDNode>(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD patterns. + // For example on AVX, extracting elements from a wide vector without using + // extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, MVT::i32)); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) - SDValue EltNo = N->getOperand(1); - if (isa<ConstantSDNode>(EltNo)) { + if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { DAG.getConstant(PtrOff, PtrType)); } - return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), Align); + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); } return SDValue(); @@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + AllUndef = false; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + if (AllUndef) + return DAG.getUNDEF(VT); + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = N->getValueType(0).getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && + ValidTypes) { + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + SDValue VecIn1, VecIn2; for (unsigned i = 0; i != NumInScalars; ++i) { // Ignore undef inputs. @@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { break; } - // If the input vector type disagrees with the result of the build_vector, - // we can't make a shuffle. + // We allow up to two distinct input vectors. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); - if (ExtractedFromVec.getValueType() != VT) { - VecIn1 = VecIn2 = SDValue(0, 0); - break; - } - - // Otherwise, remember this. We allow up to two distinct input vectors. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; @@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Mask.push_back(Idx+NumInScalars); } - // Add count and size info. + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } @@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indicies are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - // - SDValue InsIdx = N->getOperand(1); - SDValue ExtIdx = V->getOperand(2); + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); - if (InsIdx == ExtIdx) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) + return V->getOperand(1); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, + V->getOperand(0), N->getOperand(1)); + } } return SDValue(); @@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); + + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } - // FIXME: implement canonicalizations from DAG.getVectorShuffle() + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + } // If it is a splat, check if the argument vector is another splat or a // build_vector with all scalar elements the same. - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; } } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + return SDValue(); } @@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Elt = RHS.getOperand(i); if (!isa<ConstantSDNode>(Elt)) return SDValue(); - else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) Indices.push_back(NumElts); @@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } EVT VT = LHSOp.getValueType(); - assert(RHSOp.getValueType() == VT && - "SimplifyVBinOp with different BUILD_VECTOR element types"); + EVT RVT = RHSOp.getValueType(); + if (RVT != VT) { + // Integer BUILD_VECTOR operands may have types larger than the element + // size (e.g., when the element type is not legal). Prior to type + // legalization, the types may not match between the two BUILD_VECTORS. + // Truncate one of the operands to make them match. + if (RVT.getSizeInBits() > VT.getSizeInBits()) { + RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + } else { + LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + VT = RVT; + } + } SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && @@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || - (LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), @@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // FIXME: Discards pointer info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->getAlignment()); + LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), @@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, - false, Alignment); + false, false, Alignment); } } @@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - N0.getValueType().isInteger() && - N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); @@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildSDIV(SDNode *N) { std::vector<SDNode*> Built; - SDValue S = TLI.BuildSDIV(N, DAG, &Built); + SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildUDIV(SDNode *N) { std::vector<SDNode*> Built; - SDValue S = TLI.BuildUDIV(N, DAG, &Built); + SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - Ptr = LD->getBasePtr(); - Size = LD->getMemoryVT().getSizeInBits() >> 3; - SrcValue = LD->getSrcValue(); - SrcValueOffset = LD->getSrcValueOffset(); - SrcValueAlign = LD->getOriginalAlignment(); - TBAAInfo = LD->getTBAAInfo(); - return true; - } - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - Ptr = ST->getBasePtr(); - Size = ST->getMemoryVT().getSizeInBits() >> 3; - SrcValue = ST->getSrcValue(); - SrcValueOffset = ST->getSrcValueOffset(); - SrcValueAlign = ST->getOriginalAlignment(); - TBAAInfo = ST->getTBAAInfo(); - return false; - } - llvm_unreachable("FindAliasInfo expected a memory operand"); + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { + LSBaseSDNode *LS = cast<LSBaseSDNode>(N); + + Ptr = LS->getBasePtr(); + Size = LS->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LS->getSrcValue(); + SrcValueOffset = LS->getSrcValueOffset(); + SrcValueAlign = LS->getOriginalAlignment(); + TBAAInfo = LS->getTBAAInfo(); + return isa<LoadSDNode>(LS); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e8f8c73d6883..0c1ac6982d2a 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "isel" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -58,8 +59,15 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const { !hasTrivialKill(Cast->getOperand(0))) return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && @@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return 0; } - // Look up the value to see if we already have a register for it. We - // cache values defined by Instructions across blocks, and other values - // only locally. This is because Instructions already have the SSA - // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) - return I->second; - - unsigned Reg = LocalValueMap[V]; + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; @@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); @@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; DebugLoc OldDL = DL; @@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; @@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); - // FIXME: This can be optimized by combining the add with a - // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - NIsKill = true; + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } } Ty = StTy->getElementType(Field); } else { @@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + // N = N + Offset + TotalOffs += TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; - continue; + TotalOffs = 0; } // N = N + Idx * ElementSize; @@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { return false; } } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, N); @@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) { return true; } + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. switch (F->getIntrinsicID()) { default: break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || - !FuncInfo.MF->getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } const Value *Address = DI->getAddress(); - if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } unsigned Reg = 0; unsigned Offset = 0; @@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) - Reg = getRegForValue(Address); + Reg = lookUpRegForValue(Address); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { @@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) { } return true; } - case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) - break; - - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(Call, ResultReg); - return true; - } - case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) - break; - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(Call); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(Call); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(Call, ResultReg); - - return true; - } case Intrinsic::objectsize: { ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; @@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { - TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) { DL = I->getDebugLoc(); + MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } + // Remove dead code. However, ignore call instructions since we've flushed + // the local value map and recomputed the insert point. + if (!isa<CallInst>(I)) { + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + } // Next, try calling the target to attempt to handle the instruction. + SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { + ++NumFastIselSuccessTarget; DL = DebugLoc(); return true; } + // Check for dead code and remove as necessary. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DL = DebugLoc(); return false; @@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, @@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b052740a1abe..8dde919079d9 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for @@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // candidate. I.e., it would trigger the creation of a stack protector. bool MayNeedSP = (AI->isArrayAllocation() || - (TySize > 8 && isa<ArrayType>(Ty) && + (TySize >= 8 && isa<ArrayType>(Ty) && cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// argument. This overrides previous frame index entry for this argument, /// if any. void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, - int FI) { + int FI) { ByValArgFrameIndexMap[A] = FI; } @@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!"); + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return 0; } +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast<FunctionType>( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, @@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - SmallPtrSet<const BasicBlock*, 4> Visited; - - // The 'eh.selector' call may not be in the direct successor of a basic block, - // but could be several successors deeper. If we don't find it, try going one - // level further. <rdar://problem/8824861> - while (Visited.insert(SuccBB)) { - for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { - // Apply the catch info to LPad. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); -#ifndef NDEBUG - if (!FLI.MBBMap[SuccBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); -#endif - return; - } - - const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator()); - if (Br && Br->isUnconditional()) - SuccBB = Br->getSuccessor(0); - else - break; - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 2ff66f8f8715..1467d887789c 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TII->getRegClass(*II, IIOpNum, TRI); - assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && + assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags())); @@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { - unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); - unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (SRC && SRC != RC) { - MRI->setRegClass(NewVReg, SRC); - RC = SRC; + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } } AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, @@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); - // The MachineInstr constructor adds implicit-def operands. Scan through - // these to determine which are dead. - if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { - // First, collect all used registers. - SmallVector<unsigned, 8> UsedRegs; - for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) - if (F->getOpcode() == ISD::CopyFromReg) - UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); - else { - // Collect declared implicit uses. - const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); - // In addition to declared implicit uses, we must also check for - // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - UsedRegs.push_back(Reg); - } - } - // Then mark unused registers as dead. - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); - } - // Add result register values for things that are defined by this // instruction. if (NumResults) @@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<unsigned, 8> UsedRegs; + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; - if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); - // If there are no uses, mark the register as dead now, so that - // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Glue value, for the benefit of targets still using - // Glue for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - MI->addRegisterDead(Reg, TRI); + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } - // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any glue outputs, we don't do this - // because we don't know what implicit defs are being used by glued nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - if (const unsigned *IDList = II.getImplicitDefs()) { - for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); - i != e; ++i) - MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG @@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); - break; case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); - break; case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt new file mode 100644 index 000000000000..81d2e000a2e8 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SelectionDAG +parent = CodeGen +required_libraries = Analysis CodeGen Core MC Support Target TransformUtils diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 63255ae2ebd9..a96a99781f4e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,37 +46,18 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - // Libcall insertion helpers. - - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; - - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap<SDValue, SDValue> LegalizedNodes; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet<SDNode *, 16> LegalizedNodes; - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } + // Libcall insertion helpers. public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -84,9 +65,8 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -105,10 +85,7 @@ private: /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const; - - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + ArrayRef<int> Mask) const; void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,46 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + void ForgetNode(SDNode *N) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } + virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N, this); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } }; } @@ -164,7 +177,7 @@ private: SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const { + ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); - - LegalizedNodes.clear(); - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; - } - return 0; -} - -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: + if (!AnyLegalized) break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; - } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; } - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; + // Remove dead nodes now. + DAG.RemoveDeadNodes(); } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load one integer register's worth from the stack slot. SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, @@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, + false, 0); } @@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: - case ISD::VAARG: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector<SDValue, 8> Ops, ResultVals; + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + ReplacedNode(Node); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + ReplacedNode(Node); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->dump( &DAG); dbgs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); + llvm_unreachable("Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector<SDValue, 8> Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet<SDNode*, 32> NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + LD->isInvariant(), LD->getAlignment()); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } - // Since loads produce two values, make sure to remember that we - // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + if (Tmp4.getNode() != Node) { + assert(Tmp3.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + ReplacedNode(Node); + } + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } - // If this is a promoted vector load, and the vector element types are - // legal, then scalarize it. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && - TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { - SmallVector<SDValue, 8> LoadVals; - SmallVector<SDValue, 8> LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, - Node->getValueType(0).getScalarType(), - Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); - SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, - Node->getValueType(0), &LoadVals[0], LoadVals.size()); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - } - - // If this is a promoted vector load, and the vector element types are - // illegal, create the promoted vector from bitcasted segments. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { - EVT MemElemTy = Node->getValueType(0).getScalarType(); - EVT SrcSclrTy = SrcVT.getScalarType(); - unsigned SizeRatio = - (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); - - SmallVector<SDValue, 8> LoadVals; - SmallVector<SDValue, 8> LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, - SrcVT.getScalarType(), - Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - if (TLI.isBigEndian()) { - // MSB (which is garbage, comes first) - LoadVals.push_back(ScalarLoad.getValue(0)); - for (unsigned i = 0; i<SizeRatio-1; ++i) - LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); - } else { - // LSB (which is data, comes first) - for (unsigned i = 0; i<SizeRatio-1; ++i) - LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); - LoadVals.push_back(ScalarLoad.getValue(0)); - } - LoadChains.push_back(ScalarLoad.getValue(1)); - } - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); - EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NumElem*SizeRatio); - SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, - TempWideVector, &LoadVals[0], LoadVals.size()); - - // Cast to the correct type - ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - - } + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, @@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + if (Tmp2.getNode() != Node) { + assert(Tmp1.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + ReplacedNode(Node); + } + break; } case ISD::STORE: { StoreSDNode *ST = cast<StoreSDNode>(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + ReplaceNode(ST, OptStore); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - + Tmp3 = ST->getValue(); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast<StoreSDNode>(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + ReplaceNode(SDValue(Node, 0), Tmp1); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); } else { - if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || - Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + ReplaceNode(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG)); break; case TargetLowering::Expand: - - EVT WideScalarVT = Tmp3.getValueType().getScalarType(); - EVT NarrowScalarVT = StVT.getScalarType(); - - if (StVT.isVector()) { - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Tmp3.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. - EVT MemSclVT = StVT.getScalarType(); - - bool RegScalarLegal = TLI.isTypeLegal(RegSclVT); - bool MemScalarLegal = TLI.isTypeLegal(MemSclVT); - - // We need to expand this store. If the register element type - // is legal then we can scalarize the vector and use - // truncating stores. - if (RegScalarLegal) { - // Cast floats into integers - unsigned ScalarSize = MemSclVT.getSizeInBits(); - EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector<SDValue, 8> Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx)); - - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // This scalar TruncStore may be illegal, but we lehalize it - // later. - SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); - - Stores.push_back(Store); - } - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - // The scalar register type is illegal. - // For example saving <2 x i64> -> <2 x i32> on a x86. - // In here we bitcast the value into a vector of smaller parts and - // save it using smaller scalars. - if (!RegScalarLegal && MemScalarLegal) { - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits()/8; - - unsigned SizeRatio = - (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits()); - - EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), - MemSclVT, - SizeRatio * NumElem); - - // Cast the wide elem vector to wider vec with smaller elem type. - // Example <2 x i64> -> <4 x i32> - Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); - - SmallVector<SDValue, 8> Stores; - for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) { - // Extract the Ith element. - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); - // Bump pointer. - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // Store if, this element is: - // - First element on big endian, or - // - Last element on little endian - if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) || - ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) { - SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), - isVolatile, isNonTemporal, Alignment); - Stores.push_back(Store); - } - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - assert(false && "Unable to legalize the vector trunc store!"); - }// is vector - + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (Op.getValueType().isVector()) return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), @@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, DestAlign); + false, false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, @@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + false, false, false, 0); } @@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // If all elements are constants, create a load from the constant pool. if (isConstant) { - std::vector<Constant*> CV; + SmallVector<Constant*, 16> CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { @@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); } if (!MoreThanTwoValues) { @@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. - bool isTailCall = isInTailCallPosition(DAG, Node, TLI); + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + if (isTailCall) + InChain = TCChain; + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + MachinePointerInfo(), false, false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, 0); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported integer type!"); + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); @@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return Op; } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { // for now, we do this: // x = x | (x >> 1); @@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl<SDValue> &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; @@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); @@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); @@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + MachinePointerInfo(V), + false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), Node->getOperand(2), MachinePointerInfo(VS), - false, false, 0); + false, false, false, 0); Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); @@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (!TLI.isTypeLegal(EltVT)) - EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits()/NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } unsigned NumElems = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 16> Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); @@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(0), + Op0, DAG.getIntPtrConstant(Idx))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(1), + Op1, DAG.getIntPtrConstant(Idx - NumElems))); } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } @@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "Can't expand to unknown register!"); Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); break; } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && @@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + ReplaceNode(SDValue(Node, 0), Result); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl<SDValue> &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - // Perform the larger operation. + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { - //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ) { + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); @@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FDIV: + case ISD::FREM: + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; } + case ISD::FLOG2: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FEXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0))); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7c1cc69d6a2f..e3938968b205 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, - L->isVolatile(), L->isNonTemporal(), L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, case ISD::SETUEQ: LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; break; - default: assert(false && "Do not know how to soften this setcc!"); + default: llvm_unreachable("Do not know how to soften this setcc!"); } } @@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, switch (SrcVT.getSimpleVT().SimpleTy) { default: - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); case MVT::i32: Parts = TwoE32; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a5c4c2ded4c5..95ddb1e0f6fb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,7 +20,6 @@ #include "LegalizeTypes.h" #include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; @@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: - if (NOutVT.bitsEq(NInVT)) + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; @@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case TargetLowering::TypeWidenVector: - if (OutVT.bitsEq(NInVT)) - // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, DAG.getConstant(NVT.getSizeInBits() - @@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // The count is the same in the promoted type except if the original - // value was zero. This can be handled by setting the bit just off - // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); - return DAG.getNode(ISD::CTTZ, dl, NVT, Op); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.setBit(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { - SDValue Mask = GetPromotedInteger(N->getOperand(0)); + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), @@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; @@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } -#if 0 - // FIXME: This code is broken for shifts with a zero amount! // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. if ((KnownZero & HighBitMask) == HighBitMask) { - // Compute 32-amt. - SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, - DAG.getConstant(NVTBits, ShTy), - Amt); + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits-1, ShTy)); + unsigned Op1, Op2; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; } - Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); - Hi = DAG.getNode(ISD::OR, NVT, - DAG.getNode(Op1, NVT, InH, Amt), - DAG.getNode(Op2, NVT, InL, Amt2)); + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); return true; } -#endif return false; } @@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } - - return false; } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, @@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, @@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, @@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), false, - true, Func, Args, DAG, dl); + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); @@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { else if (SrcVT == MVT::i128) FF = APInt(32, F32TwoE128); else - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); // Check whether the sign bit is set. SDValue Lo, Hi; @@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(1); - assert(Op0.getValueType() == Op1.getValueType() && - "Invalid input vector types"); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); - unsigned NumElem0 = Op0.getValueType().getVectorNumElements(); - unsigned NumElem1 = Op1.getValueType().getVectorNumElements(); + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); unsigned NumOutElem = NOutVT.getVectorNumElements(); - assert(NumElem0 + NumElem1 == NumOutElem && - "Invalid number of incoming elements"); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); - for (unsigned i = 0; i < NumElem0; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op0.getValueType().getScalarType(), Op0, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); - } - - // Take the elements from the second vector - for (unsigned i = 0; i < NumElem1; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op1.getValueType().getScalarType(), Op1, - DAG.getIntPtrConstant(i)); - Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InElemTy, Op, DAG.getIntPtrConstant(j)); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } } return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577433cc..439aa4de5cf5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() { for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, @@ -275,8 +273,6 @@ ScanOperands: EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results @@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); return CallInfo.first; } @@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); return CallInfo; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index abacdac686bc..e8664458e9a6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,7 @@ private: SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); @@ -633,6 +634,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 8e7e4985e4d0..a8ff7c65abde 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -21,7 +21,6 @@ #include "LegalizeTypes.h" #include "llvm/Target/TargetData.h" -#include "llvm/CodeGen/PseudoSourceValue.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Handle some special cases efficiently. switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; @@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), false, - false, MinAlign(Alignment, IncrementSize)); + false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, + isVolatile, isNonTemporal, isInvariant, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f815b00db5d6..3ae8345bd198 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,8 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + return TranslateLegalizeResults(Op, Result); + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ST->getValue().getValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + switch (TLI.getTruncStoreAction(ValVT, StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + Changed = true; + return LegalizeOp(TLI.LowerOperation(Result, DAG)); + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandStore(Op)); + } + } + bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; @@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: - case ISD::CTTZ: case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: @@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, + Op.getNode()->getValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue Chain = ST->getChain(); + SDValue BasePTR = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, Alignment); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Stores.push_back(Store); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + AddLegalizedOperand(Op, TF); + return TF; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. - if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::XOR, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() && "Invalid mask size"); @@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { DebugLoc DL = Op.getDebugLoc(); // Make sure that the SINT_TO_FP and SRL instructions are available. - if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); EVT SVT = VT.getScalarType(); assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107a42b2951c..5f23f01dafb4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; switch (N->getOpcode()) { default: @@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: - case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Lo part from the stack slot. Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, MinAlign(Alignment, IncrementSize)); + false, false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - Alignment); + isInvariant, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // Split the input. + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case TargetLowering::TypeLegal: { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), Lo, Hi); + } else { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypePromoteInteger: { - SDValue InOp = GetPromotedInteger(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), - InOp.getValueType().getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypeSplitVector: - GetSplitVector(N->getOperand(0), Lo, Hi); - break; - case TargetLowering::TypeWidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } } if (N->getOpcode() == ISD::FP_ROUND) { @@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; @@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, Cond1, InOp1, InOp2); } @@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { MVT::Other,&StChain[0],StChain.size()); } +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// @@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, isInvariant, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); + SDValue L; if (LdWidth < NewVTWidth) { // Our current type we are using is too large, find a better size NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); - } - - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, MinAlign(Align, Increment)); - LdChain.push_back(LdOp.getValue(1)); - LdOps.push_back(LdOp); + isNonTemporal, isInvariant, + MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdWidth -= NewVTWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 000000000000..ff0136e08cd9 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,657 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<signed> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : + Picker(this), + InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) +{ + TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); + TLI = &IS->getTargetLowering(); + + const TargetMachine &tm = (*IS->MF).getTarget(); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + // This hard requirment could be relaxed, but for now + // do not let it procede. + assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions alredy in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignor order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + EVT VT = SU->getNode()->getValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is acheived by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 15; +static const unsigned PriorityFive = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFive; + break; + + case ISD::INLINEASM: + ResCount += PriorityFour; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return 0; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector<SUnit *>::iterator I = Queue.begin(), + E = Queue.end(); I != E; ++I) { + if (*I == *Best) + continue; + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} + + +#ifdef NDEBUG +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { + ResourcePriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b275c6321ae4..24da432a47a1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -43,7 +43,7 @@ namespace { SmallVector<SUnit *, 16> Queue; bool empty() const { return Queue.empty(); } - + void push(SUnit *U) { Queue.push_back(U); } @@ -101,8 +101,8 @@ private: bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); - /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool ForceUnitLatencies() const { return true; } + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. @@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. if (!LiveRegDefs[I->getReg()]) { @@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); - SUnit *NewSU = NewSUnit(N); + SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { @@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { - LoadSU = NewSUnit(LoadNode); + LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } @@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } if (isNewLoad) { AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); } @@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { - SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, Added = true; } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { if (RegAdded.insert(*Alias)) { LRegs.push_back(*Alias); @@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e757defd3895..2cb5d37d689e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -45,10 +45,6 @@ static RegisterScheduler "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); static RegisterScheduler - tdrListrDAGScheduler("list-tdrr", - "Top-down register reduction list scheduling", - createTDRRListDAGScheduler); -static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", @@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath( static cl::opt<bool> DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); static cl::opt<int> MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), @@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC( "sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists.")); -#ifndef NDEBUG -namespace { - // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, - FactStatic, FactOther, NumFactors }; -} -static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; -static int FactorCount[NumFactors]; -#endif //!NDEBUG - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -121,10 +109,6 @@ namespace { /// class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// NeedLatency - True if the scheduler will make use of latency information. /// bool NeedLatency; @@ -162,11 +146,15 @@ private: /// and similar queries. ScheduleDAGTopologicalSort Topo; + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) - : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { @@ -221,8 +209,6 @@ private: void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); - void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); - void ReleaseSuccessors(SUnit *SU); void ReleasePending(); void AdvanceToCycle(unsigned NextCycle); void AdvancePastStalls(SUnit *SU); @@ -242,15 +228,11 @@ private: SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); - void ScheduleNodeTopDown(SUnit*); - void ListScheduleTopDown(); - - /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); - SUnit *NewNode = NewSUnit(N); + SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) Topo.InitDAGTopologicalSorting(); @@ -268,9 +250,9 @@ private: return NewNode; } - /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool ForceUnitLatencies() const { + bool forceUnitLatencies() const { return !NeedLatency; } }; @@ -278,7 +260,7 @@ private: /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, -/// but for untyped values (MVT::untyped) it means inspecting the node's +/// but for untyped values (MVT::Untyped) it means inspecting the node's /// opcode to determine what register class is being generated. static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, @@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); unsigned Opcode = Node->getMachineOpcode(); @@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - FactorCount[i] = 0; - } -#endif //!NDEBUG CurCycle = 0; IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegGens.resize(TRI->getNumRegs(), NULL); + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + CallSeqEndForStart.clear(); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() { HazardRec->Reset(); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + // Execute the actual scheduling loop. + ListScheduleBottomUp(); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); - } -#endif // !NDEBUG AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; - if (!ForceUnitLatencies()) { + if (!forceUnitLatencies()) { // Updating predecessor's height. This is now the cycle when the // predecessor can be scheduled without causing a pipeline stall. PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); @@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - unsigned ReadyCycle = - isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + unsigned ReadyCycle = PendingQueue[i]->getHeight(); if (ReadyCycle < MinAvailableCycle) MinAvailableCycle = ReadyCycle; @@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { } else { for (; CurCycle != NextCycle; ++CurCycle) { - if (isBottomUp) - HazardRec->RecedeCycle(); - else - HazardRec->AdvanceCycle(); + HazardRec->RecedeCycle(); } } // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the @@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // currently need to treat these nodes like real instructions. // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; - unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + unsigned ReadyCycle = SU->getHeight(); // Bump CurCycle to account for latency. We assume the latency of other // available instructions may be hidden by the stall (not a full pipe stall). @@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // Calls are scheduled in their preceding cycle, so don't conflict with // hazards from instructions after the call. EmitNode will reset the // scoreboard state before emitting the call. - if (isBottomUp && SU->isCall) + if (SU->isCall) return; // FIXME: For resource conflicts in very long non-pipelined stages, we @@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { int Stalls = 0; while (true) { ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + HazardRec->getHazardType(SU, -Stalls); if (HT == ScheduleHazardRecognizer::NoHazard) break; @@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { HazardRec->Reset(); return; } - if (isBottomUp && SU->isCall) { + if (SU->isCall) { // Calls are scheduled with their preceding instructions. For bottom-up // scheduling, clear the pipeline state before emitting. HazardRec->Reset(); } HazardRec->EmitInstruction(SU); - - if (!isBottomUp && SU->isCall) { - HazardRec->Reset(); - } } static void resetVRegCycle(SUnit *SU); @@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { Sequence.push_back(SU); - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then // advance CurCycle before ReleasePredecessors to avoid useless pushes to @@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } resetVRegCycle(SU); @@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) + ++NumLiveRegs; // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (!LiveRegDefs[I->getReg()]) { - ++NumLiveRegs; - } if (LiveRegGens[I->getReg()] == NULL || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); @@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { else { AvailableQueue->push(SU); } - AvailableQueue->UnscheduledNode(SU); + AvailableQueue->unscheduledNode(SU); } /// After backtracking, the hazard checker needs to be restored to a state @@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { LoadNode->setNodeId(LoadSU->NodeNum); InitNumRegDefsLeft(LoadSU); - ComputeLatency(LoadSU); + computeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); @@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); - ComputeLatency(NewSU); + computeLatency(NewSU); // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; @@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, } } +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i)) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const RegisterMaskSDNode *Op = + dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) + return Op->getRegMask(); + return NULL; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(isBottomUp); -#endif -} - -//===----------------------------------------------------------------------===// -// Top-Down Scheduling -//===----------------------------------------------------------------------===// - -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the AvailableQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { - SUnit *SuccSU = SuccEdge->getSUnit(); - -#ifndef NDEBUG - if (SuccSU->NumPredsLeft == 0) { - dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); - dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); - } -#endif - --SuccSU->NumPredsLeft; - - // If all the node's predecessors are scheduled, this node is ready - // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { - SuccSU->isAvailable = true; - AvailableQueue->push(SuccSU); - } -} - -void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { - // Top down: release successors - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && - "The list-tdrr scheduler doesn't yet support physreg dependencies!"); - - ReleaseSucc(SU, &*I); - } -} - -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending -/// count of its successors. If a successor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); - - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); - Sequence.push_back(SU); - - ReleaseSuccessors(SU); - SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); -} - -/// ListScheduleTopDown - The main loop of list scheduling for top-down -/// schedulers. -void ScheduleDAGRRList::ListScheduleTopDown() { - AvailableQueue->setCurCycle(CurCycle); - - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); - - // All leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; - } - } - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { - SUnit *CurSU = AvailableQueue->pop(); - - if (CurSU) - ScheduleNodeTopDown(CurSU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); - } - -#ifndef NDEBUG - VerifySchedule(isBottomUp); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } - //===----------------------------------------------------------------------===// // RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// @@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort { bool operator()(SUnit* left, SUnit* right) const; }; -// td_ls_rr_sort - Priority function for top down register pressure reduction -// scheduler. -struct td_ls_rr_sort : public queue_sort { - enum { - IsBottomUp = false, - HasReadyFilter = false - }; - - RegReductionPQBase *SPQ; - td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; -}; - // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { enum { @@ -1510,6 +1587,7 @@ protected: std::vector<SUnit*> Queue; unsigned CurQueueId; bool TracksRegPressure; + bool SrcOrder; // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; @@ -1535,11 +1613,12 @@ public: RegReductionPQBase(MachineFunction &mf, bool hasReadyFilter, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1610,9 +1689,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void ScheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU); - void UnscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU); protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase { public: RegReductionPriorityQueue(MachineFunction &mf, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), Picker(this) {} bool isBottomUp() const { return SF::IsBottomUp; } @@ -1680,10 +1761,7 @@ public: SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); - if (isBottomUp()) - dbgs() << "Height " << SU->getHeight() << ": "; - else - dbgs() << "Depth " << SU->getDepth() << ": "; + dbgs() << "Height " << SU->getHeight() << ": "; SU->dump(DAG); } } @@ -1692,9 +1770,6 @@ public: typedef RegReductionPriorityQueue<bu_ls_rr_sort> BURegReductionPriorityQueue; -typedef RegReductionPriorityQueue<td_ls_rr_sort> -TDRegReductionPriorityQueue; - typedef RegReductionPriorityQueue<src_ls_rr_sort> SrcRegReductionPriorityQueue; @@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { return PDiff; } -void RegReductionPQBase::ScheduledNode(SUnit *SU) { +void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { dumpRegPressure(); } -void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LHeight = (int)left->getHeight() + LPenalty; int RHeight = (int)right->getHeight() + RPenalty; - bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && BUHasStall(left, LHeight, SPQ); - bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && BUHasStall(right, RHeight, SPQ); // If scheduling one of the node will cause a pipeline stall, delay it. // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) { - DEBUG(++FactorCount[FactStall]); + if (!RStall) return 1; - } - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactStall]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } - } else if (RStall) { - DEBUG(++FactorCount[FactStall]); + } else if (RStall) return -1; - } // If either node is scheduling for latency, sort them by height/depth // and latency. - if (!checkPref || (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency)) { + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { if (DisableSchedCycles) { - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactHeight]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } } else { // If neither instruction stalls (!LStall && !RStall) then @@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) { - DEBUG(++FactorCount[FactOther]); + if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; - } } return 0; } @@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool LHasPhysReg = left->hasPhysRegDefs; bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { - DEBUG(++FactorCount[FactRegUses]); #ifndef NDEBUG const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; #endif @@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; } - if (LPriority != RPriority) { - DEBUG(++FactorCount[FactStatic]); + if (LPriority != RPriority) return LPriority > RPriority; - } // One or both of the nodes are calls and their sethi-ullman numbers are the // same, then keep source order. @@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) { - DEBUG(++FactorCount[FactOther]); + if (LDist != RDist) return LDist < RDist; - } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) { - DEBUG(++FactorCount[FactOther]); + if (LScratch != RScratch) return LScratch > RScratch; - } // Comparing latency against a call makes little sense unless the node // is register pressure-neutral. @@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { return result > 0; } else { - if (left->getHeight() != right->getHeight()) { - DEBUG(++FactorCount[FactHeight]); + if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); - } - if (left->getDepth() != right->getDepth()) { - DEBUG(++FactorCount[FactDepth]); + if (left->getDepth() != right->getDepth()) return left->getDepth() < right->getDepth(); - } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); - DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } @@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; @@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); return LPDiff > RPDiff; @@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { bool LReduce = canEnableCoalescing(left); bool RReduce = canEnableCoalescing(right); - DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); if (LReduce && !RReduce) return false; if (RReduce && !LReduce) return true; } @@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); - DEBUG(++FactorCount[FactRegUses]); return LLiveUses < RLiveUses; } if (!DisableSchedStalls) { bool LStall = BUHasStall(left, left->getHeight(), SPQ); bool RStall = BUHasStall(right, right->getHeight(), SPQ); - if (LStall != RStall) { - DEBUG(++FactorCount[FactHeight]); + if (LStall != RStall) return left->getHeight() > right->getHeight(); - } } if (!DisableSchedCriticalPath) { @@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " << left->getDepth() << " != SU(" << right->NodeNum << "): " << right->getDepth() << "\n"); - DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); } } if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { int spread = (int)left->getHeight() - (int)right->getHeight(); - if (std::abs(spread) > MaxReorderWindow) { - DEBUG(++FactorCount[FactHeight]); + if (std::abs(spread) > MaxReorderWindow) return left->getHeight() > right->getHeight(); - } } return BURRSort(left, right, SPQ); @@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) + if (!TracksRegPressure && !SrcOrder) PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); @@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const unsigned *ImpDefs + const uint16_t *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); - if(!ImpDefs) + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) return false; for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); @@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if (!PI->isAssignedRegDep()) continue; - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries a - // topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) - return true; - } + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; } } return false; @@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const unsigned *SUImpDefs = + const uint16_t *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); - if (!SUImpDefs) - return false; + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) @@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!N->hasAnyUseOfValue(i)) continue; unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; for (;*SUImpDefs; ++SUImpDefs) { unsigned SUReg = *SUImpDefs; if (TRI->regsOverlap(Reg, SUReg)) @@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { } } -/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled -/// predecessors of the successors of the SUnit SU. Stop when the provided -/// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, - unsigned Limit) { - unsigned Sum = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - const SUnit *SuccSU = I->getSUnit(); - for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), - EE = SuccSU->Preds.end(); II != EE; ++II) { - SUnit *PredSU = II->getSUnit(); - if (!PredSU->isScheduled) - if (++Sum > Limit) - return Sum; - } - } - return Sum; -} - - -// Top down -bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - if (int res = checkSpecialNodes(left, right)) - return res < 0; - - unsigned LPriority = SPQ->getNodePriority(left); - unsigned RPriority = SPQ->getNodePriority(right); - bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); - bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); - bool LIsFloater = LIsTarget && left->NumPreds == 0; - bool RIsFloater = RIsTarget && right->NumPreds == 0; - unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; - unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; - - if (left->NumSuccs == 0 && right->NumSuccs != 0) - return false; - else if (left->NumSuccs != 0 && right->NumSuccs == 0) - return true; - - if (LIsFloater) - LBonus -= 2; - if (RIsFloater) - RBonus -= 2; - if (left->NumSuccs == 1) - LBonus += 2; - if (right->NumSuccs == 1) - RBonus += 2; - - if (LPriority+LBonus != RPriority+RBonus) - return LPriority+LBonus < RPriority+RBonus; - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); - - if (left->NumSuccsLeft != right->NumSuccsLeft) - return left->NumSuccsLeft > right->NumSuccsLeft; - - assert(left->NodeQueueId && right->NodeQueueId && - "NodeQueueId cannot be zero"); - return (left->NodeQueueId > right->NodeQueueId); -} - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); - PQ->setScheduleDAG(SD); - return SD; -} - -llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - TDRegReductionPriorityQueue *PQ = - new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); @@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = - new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 71f07d6fa47a..69dd813b24e0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,8 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), + : ScheduleDAG(mf), BB(0), DAG(0), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// -void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; DAG = dag; - ScheduleDAG::Run(bb, insertPos); + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); } /// NewSUnit - Creates a new SUnit and return a ptr to it. /// -SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG const SUnit *Addr = 0; if (!SUnits.empty()) @@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { } SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { - SUnit *SU = NewSUnit(Old->getNode()); + SUnit *SU = newSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; @@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - SUnit *NodeSUnit = NewSUnit(NI); + SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue @@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + computeLatency(NodeSUnit); } // Find all call operands. @@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); + bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } } -void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers @@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) { + if (forceUnitLatencies()) { SU->Latency = 1; return; } @@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency += TII->getInstrLatency(InstrItins, N); } -void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) + if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) @@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + namespace { struct OrderSorter { bool operator()(const std::pair<unsigned, MachineInstr*> &A, @@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} -/// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. - EmitNoop(); + TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } @@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. - EmitPhysRegCopy(SU, CopyVRBaseMap); + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } @@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { - MachineBasicBlock *InsertBB = Emitter.getBlock(); - MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); - if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); - if (DbgMI) - InsertBB->insert(Pos, DbgMI); - } + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); ++DI; } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } - BB = Emitter.getBlock(); InsertPos = Emitter.getInsertPos(); - return BB; + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 9c27b2ea02ec..75940ec33ddc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -35,17 +35,20 @@ namespace llvm { /// class ScheduleDAGSDNodes : public ScheduleDAG { public: + MachineBasicBlock *BB; SelectionDAG *DAG; // DAG of the current basic block const InstrItineraryData *InstrItins; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + explicit ScheduleDAGSDNodes(MachineFunction &mf); virtual ~ScheduleDAGSDNodes() {} /// Run - perform scheduling. /// - void Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos); + void Run(SelectionDAG *dag, MachineBasicBlock *bb); /// isPassiveNode - Return true if the node is a non-scheduled leaf. /// @@ -53,6 +56,7 @@ namespace llvm { if (isa<ConstantSDNode>(Node)) return true; if (isa<ConstantFPSDNode>(Node)) return true; if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; if (isa<GlobalAddressSDNode>(Node)) return true; if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; @@ -67,7 +71,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N); + SUnit *newSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -78,7 +82,7 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AliasAnalysis *AA); /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is /// CopyToReg and its only active data operands are CopyFromReg within a @@ -90,30 +94,41 @@ namespace llvm { /// void InitNumRegDefsLeft(SUnit *SU); - /// ComputeLatency - Compute node latency. + /// computeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU); + virtual void computeLatency(SUnit *SU); - /// ComputeOperandLatency - Override dependence edge latency using + /// computeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { } - virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; - virtual MachineBasicBlock *EmitSchedule(); - /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual void dumpNode(const SUnit *SU) const; + void dumpSchedule() const; + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + virtual std::string getDAGName() const; + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; /// RegDefIter - In place iteration over the values defined by an @@ -159,6 +174,9 @@ namespace llvm { /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); }; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 430283d5eff9..c8512914c1e2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -1,4 +1,4 @@ -//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -31,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" #include <climits> using namespace llvm; @@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); static RegisterScheduler - tdListDAGScheduler("list-td", "Top-down list scheduler", - createTDListDAGScheduler); + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); namespace { //===----------------------------------------------------------------------===// -/// ScheduleDAGList - The actual list scheduler implementation. This supports -/// top-down scheduling. +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. /// -class ScheduleDAGList : public ScheduleDAGSDNodes { +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { private: /// AvailableQueue - The priority queue to use for the available SUnits. /// @@ -61,16 +62,20 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + public: - ScheduleDAGList(MachineFunction &mf, + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); } - ~ScheduleDAGList() { + ~ScheduleDAGVLIW() { delete HazardRec; delete AvailableQueue; } @@ -78,23 +83,25 @@ public: void Schedule(); private: - void ReleaseSucc(SUnit *SU, const SDep &D); - void ReleaseSuccessors(SUnit *SU); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); - void ListScheduleTopDown(); + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); }; } // end anonymous namespace /// Schedule - Schedule the DAG using list scheduling. -void ScheduleDAGList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(AA); AvailableQueue->initNodes(SUnits); - ListScheduleTopDown(); + listScheduleTopDown(); AvailableQueue->releaseState(); } @@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() { // Top-Down Scheduling //===----------------------------------------------------------------------===// -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG @@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); + } } -void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { assert(!I->isAssignedRegDep() && "The list-td scheduler doesn't yet support physreg dependencies!"); - ReleaseSucc(SU, *I); + releaseSucc(SU, *I); } } -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); - ReleaseSuccessors(SU); + releaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); } -/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// listScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void ScheduleDAGList::ListScheduleTopDown() { +void ScheduleDAGVLIW::listScheduleTopDown() { unsigned CurCycle = 0; // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); + releaseSuccessors(&EntrySU); - // All leaves to Available queue. + // All leaves to AvailableQueue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. if (SUnits[i].Preds.empty()) { @@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } } - // While Available queue is not empty, grab the node with the highest + // While AvailableQueue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; Sequence.reserve(SUnits.size()); @@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() { PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else { + } + else { assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } @@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // If there are no instructions available, don't try to issue anything, and // don't advance the hazard recognizer. if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(0); ++CurCycle; continue; } @@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // If we found a node to schedule, do it now. if (FoundSUnit) { - ScheduleNodeTopDown(FoundSUnit, CurCycle); + scheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); // If this is a pseudo-op node, we don't want to increment the current @@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); + VerifyScheduledSequence(/*isBottomUp=*/false); #endif } @@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler. +/// createVLIWDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * -llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 20bea8e4c9e9..92671d1678c6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return &APFloat::IEEEhalf; case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { if (i == e) return false; // Do not accept build_vectors that aren't all constants or which have non-~0 - // elements. + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (isa<ConstantSDNode>(NotZero)) { - if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < + EltSize) return false; } else if (isa<ConstantFPSDNode>(NotZero)) { - if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). - bitcastToAPInt().isAllOnesValue()) + if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() + .bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; // Okay, we have at least one ~0 value, check to see if the rest match or are - // undefs. + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) if (N->getOperand(i) != NotZero && N->getOperand(i).getOpcode() != ISD::UNDEF) @@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::Register: ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); break; - + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; case ISD::SRCVALUE: ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); break; @@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// static inline unsigned encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, - bool isNonTemporal) { + bool isNonTemporal, bool isInvariant) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && @@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, return ConvType | (AM << 2) | (isVolatile << 5) | - (isNonTemporal << 6); + (isNonTemporal << 6) | + (isInvariant << 7); } //===----------------------------------------------------------------------===// @@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ SmallVector<SDNode*, 16> DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + RemoveDeadNodes(DeadNodes, UpdateListener); } @@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm) +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); @@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); - } else { - assert(0 && "Unsupported type in getConstantFP"); - return SDValue(); - } + } else + llvm_unreachable("Unsupported type in getConstantFP"); } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, @@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(N, 0); } +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + ID.AddPointer(RegMask); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; @@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) const { - unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && - "Mask size mismatches value type size!"); +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. - if (Depth == 6 || Mask == 0) + if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; @@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero |= KnownZero2; return; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownOne |= KnownOne2; return; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::MUL: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; return; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero &= KnownZero2; return; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShAmt; KnownOne <<= ShAmt; @@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } return; @@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - APInt InDemandedMask = (Mask << ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; - if (HighBits.getBoolValue()) - InDemandedMask |= APInt::getSignBit(BitWidth); + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); APInt InSignBit = APInt::getSignBit(EBits); - APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); @@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); if (ISD::isZEXTLoad(Op.getNode())) { - LoadSDNode *LD = cast<LoadSDNode>(Op); EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); } return; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. Temporarily set this bit in the mask for our callee. - if (NewBits.getBoolValue()) - InMask |= InSignBit; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, assert(!(SignBitKnownZero && SignBitKnownOne) && "Sign bit can't be known to be both zero and one!"); - // If the sign bit wasn't actually demanded by our caller, we don't - // want it set in the KnownZero and KnownOne result values. Reset the - // mask and reapply it to the result values. - InMask = Mask.trunc(InBits); - KnownZero &= InMask; - KnownOne &= InMask; - KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.trunc(InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); return; @@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.zext(InBits); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); @@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); return; } case ISD::FGETSIGN: @@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } @@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); @@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (RA.isPowerOf2()) { APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // the upper bits are all one. if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } @@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); break; } @@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, - Depth+1); - ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); return; } case ISD::FrameIndex: @@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); @@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::TRUNCATE: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (NoNaNsFPMath) + if (getTarget().Options.NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::CTPOP: return getConstant(Val.countPopulation(), VT); case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: return getConstant(Val.countLeadingZeros(), VT); case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), VT); } } @@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || - OpOpcode == ISD::ANY_EXTEND) { + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. if (Operand.getNode()->getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else - return Operand.getNode()->getOperand(0); + return Operand.getNode()->getOperand(0); } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); break; case ISD::BITCAST: // Basic sanity checking. @@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (UnsafeFPMath && OpOpcode == ISD::FSUB) + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::FNEG) // --X -> X @@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { // 0+x --> x if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) @@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. @@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath) return N2; break; case ISD::MUL: @@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SELECT: if (N1C) { if (N1C->getZExtValue()) - return N2; // select true, X, Y -> X - else - return N3; // select false, X, Y -> Y + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y } if (N2 == N3) return N2; // select C, X, X -> X break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); - break; case ISD::INSERT_SUBVECTOR: { SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() @@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { + const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumBits = VT.getSizeInBits(); - unsigned MSB = NumBits / 8; + unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + uint64_t Val = 0; - if (TLI.isLittleEndian()) - Offset = Offset + MSB - 1; - for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | (unsigned char)Str[Offset]; - Offset += TLI.isLittleEndian() ? -1 : 1; + if (TLI.isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } + return DAG.getConstant(Val, VT); } @@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// -static bool isMemSrcFromString(SDValue Src, std::string &Str) { +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; GlobalAddressSDNode *G = NULL; if (Src.getOpcode() == ISD::GlobalAddress) @@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); - if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) - return true; - - return false; + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); } /// FindOptimalMemOpLowering - Determines the optimial series memory ops @@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, MemcpyStrSrc, + IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - std::string Str; + StringRef Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); @@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, @@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, SrcAlign); + false, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool NonScalarIntSafe = + bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, false, DAG, TLI)) + IsZeroVal, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); @@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); @@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMSET), + /*isTailCall=*/false, + /*doesNotReturn*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); @@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. @@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo); + TBAAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), + MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo, + const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, TBAAInfo); } @@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, @@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } +/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + N->setDebugLoc(DebugLoc()); + } + return N; +} + /// MorphNodeTo - This *mutates* the specified node to have the specified /// return type, opcode, and operands. /// @@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return ON; + return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); } if (!RemoveNodeFromCSEMaps(N)) @@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return cast<MachineSDNode>(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + } } // Allocate a new MachineSDNode. @@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { @@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); @@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } @@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } -std::string SDNode::getOperationName(const SelectionDAG *G) const { - switch (getOpcode()) { - default: - if (getOpcode() < ISD::BUILTIN_OP_END) - return "<<Unknown DAG Node>>"; - if (isMachineOpcode()) { - if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) - if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->get(getMachineOpcode()).getName(); - return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; - } - if (G) { - const TargetLowering &TLI = G->getTargetLoweringInfo(); - const char *Name = TLI.getTargetNodeName(getOpcode()); - if (Name) return Name; - return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; - } - return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; - -#ifndef NDEBUG - case ISD::DELETED_NODE: - return "<<Deleted Node!>>"; -#endif - case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; - case ISD::ATOMIC_FENCE: return "AtomicFence"; - case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; - case ISD::ATOMIC_SWAP: return "AtomicSwap"; - case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; - case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; - case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; - case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; - case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; - case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; - case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; - case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; - case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; - case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; - case ISD::ATOMIC_LOAD: return "AtomicLoad"; - case ISD::ATOMIC_STORE: return "AtomicStore"; - case ISD::PCMARKER: return "PCMarker"; - case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; - case ISD::SRCVALUE: return "SrcValue"; - case ISD::MDNODE_SDNODE: return "MDNode"; - case ISD::EntryToken: return "EntryToken"; - case ISD::TokenFactor: return "TokenFactor"; - case ISD::AssertSext: return "AssertSext"; - case ISD::AssertZext: return "AssertZext"; - - case ISD::BasicBlock: return "BasicBlock"; - case ISD::VALUETYPE: return "ValueType"; - case ISD::Register: return "Register"; - - case ISD::Constant: return "Constant"; - case ISD::ConstantFP: return "ConstantFP"; - case ISD::GlobalAddress: return "GlobalAddress"; - case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; - case ISD::FrameIndex: return "FrameIndex"; - case ISD::JumpTable: return "JumpTable"; - case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; - case ISD::RETURNADDR: return "RETURNADDR"; - case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; - case ISD::EH_RETURN: return "EH_RETURN"; - case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; - case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; - case ISD::ConstantPool: return "ConstantPool"; - case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::BlockAddress: return "BlockAddress"; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) - return TII->getName(IID); - llvm_unreachable("Invalid intrinsic ID"); - } - - case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; - case ISD::TargetConstantFP:return "TargetConstantFP"; - case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; - case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; - case ISD::TargetFrameIndex: return "TargetFrameIndex"; - case ISD::TargetJumpTable: return "TargetJumpTable"; - case ISD::TargetConstantPool: return "TargetConstantPool"; - case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; - case ISD::TargetBlockAddress: return "TargetBlockAddress"; - - case ISD::CopyToReg: return "CopyToReg"; - case ISD::CopyFromReg: return "CopyFromReg"; - case ISD::UNDEF: return "undef"; - case ISD::MERGE_VALUES: return "merge_values"; - case ISD::INLINEASM: return "inlineasm"; - case ISD::EH_LABEL: return "eh_label"; - case ISD::HANDLENODE: return "handlenode"; - - // Unary operators - case ISD::FABS: return "fabs"; - case ISD::FNEG: return "fneg"; - case ISD::FSQRT: return "fsqrt"; - case ISD::FSIN: return "fsin"; - case ISD::FCOS: return "fcos"; - case ISD::FTRUNC: return "ftrunc"; - case ISD::FFLOOR: return "ffloor"; - case ISD::FCEIL: return "fceil"; - case ISD::FRINT: return "frint"; - case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::FEXP: return "fexp"; - case ISD::FEXP2: return "fexp2"; - case ISD::FLOG: return "flog"; - case ISD::FLOG2: return "flog2"; - case ISD::FLOG10: return "flog10"; - - // Binary operators - case ISD::ADD: return "add"; - case ISD::SUB: return "sub"; - case ISD::MUL: return "mul"; - case ISD::MULHU: return "mulhu"; - case ISD::MULHS: return "mulhs"; - case ISD::SDIV: return "sdiv"; - case ISD::UDIV: return "udiv"; - case ISD::SREM: return "srem"; - case ISD::UREM: return "urem"; - case ISD::SMUL_LOHI: return "smul_lohi"; - case ISD::UMUL_LOHI: return "umul_lohi"; - case ISD::SDIVREM: return "sdivrem"; - case ISD::UDIVREM: return "udivrem"; - case ISD::AND: return "and"; - case ISD::OR: return "or"; - case ISD::XOR: return "xor"; - case ISD::SHL: return "shl"; - case ISD::SRA: return "sra"; - case ISD::SRL: return "srl"; - case ISD::ROTL: return "rotl"; - case ISD::ROTR: return "rotr"; - case ISD::FADD: return "fadd"; - case ISD::FSUB: return "fsub"; - case ISD::FMUL: return "fmul"; - case ISD::FDIV: return "fdiv"; - case ISD::FMA: return "fma"; - case ISD::FREM: return "frem"; - case ISD::FCOPYSIGN: return "fcopysign"; - case ISD::FGETSIGN: return "fgetsign"; - case ISD::FPOW: return "fpow"; - - case ISD::FPOWI: return "fpowi"; - case ISD::SETCC: return "setcc"; - case ISD::SELECT: return "select"; - case ISD::VSELECT: return "vselect"; - case ISD::SELECT_CC: return "select_cc"; - case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; - case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; - case ISD::CONCAT_VECTORS: return "concat_vectors"; - case ISD::INSERT_SUBVECTOR: return "insert_subvector"; - case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; - case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; - case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; - case ISD::CARRY_FALSE: return "carry_false"; - case ISD::ADDC: return "addc"; - case ISD::ADDE: return "adde"; - case ISD::SADDO: return "saddo"; - case ISD::UADDO: return "uaddo"; - case ISD::SSUBO: return "ssubo"; - case ISD::USUBO: return "usubo"; - case ISD::SMULO: return "smulo"; - case ISD::UMULO: return "umulo"; - case ISD::SUBC: return "subc"; - case ISD::SUBE: return "sube"; - case ISD::SHL_PARTS: return "shl_parts"; - case ISD::SRA_PARTS: return "sra_parts"; - case ISD::SRL_PARTS: return "srl_parts"; - - // Conversion operators. - case ISD::SIGN_EXTEND: return "sign_extend"; - case ISD::ZERO_EXTEND: return "zero_extend"; - case ISD::ANY_EXTEND: return "any_extend"; - case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; - case ISD::TRUNCATE: return "truncate"; - case ISD::FP_ROUND: return "fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; - case ISD::FP_EXTEND: return "fp_extend"; - - case ISD::SINT_TO_FP: return "sint_to_fp"; - case ISD::UINT_TO_FP: return "uint_to_fp"; - case ISD::FP_TO_SINT: return "fp_to_sint"; - case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bitcast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; - - case ISD::CONVERT_RNDSAT: { - switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - - // Control flow instructions - case ISD::BR: return "br"; - case ISD::BRIND: return "brind"; - case ISD::BR_JT: return "br_jt"; - case ISD::BRCOND: return "brcond"; - case ISD::BR_CC: return "br_cc"; - case ISD::CALLSEQ_START: return "callseq_start"; - case ISD::CALLSEQ_END: return "callseq_end"; - - // Other operators - case ISD::LOAD: return "load"; - case ISD::STORE: return "store"; - case ISD::VAARG: return "vaarg"; - case ISD::VACOPY: return "vacopy"; - case ISD::VAEND: return "vaend"; - case ISD::VASTART: return "vastart"; - case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; - case ISD::EXTRACT_ELEMENT: return "extract_element"; - case ISD::BUILD_PAIR: return "build_pair"; - case ISD::STACKSAVE: return "stacksave"; - case ISD::STACKRESTORE: return "stackrestore"; - case ISD::TRAP: return "trap"; - - // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTLZ: return "ctlz"; - - // Trampolines - case ISD::INIT_TRAMPOLINE: return "init_trampoline"; - case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; - - case ISD::CONDCODE: - switch (cast<CondCodeSDNode>(this)->get()) { - default: llvm_unreachable("Unknown setcc condition!"); - case ISD::SETOEQ: return "setoeq"; - case ISD::SETOGT: return "setogt"; - case ISD::SETOGE: return "setoge"; - case ISD::SETOLT: return "setolt"; - case ISD::SETOLE: return "setole"; - case ISD::SETONE: return "setone"; - - case ISD::SETO: return "seto"; - case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; - case ISD::SETUGT: return "setugt"; - case ISD::SETUGE: return "setuge"; - case ISD::SETULT: return "setult"; - case ISD::SETULE: return "setule"; - case ISD::SETUNE: return "setune"; - - case ISD::SETEQ: return "seteq"; - case ISD::SETGT: return "setgt"; - case ISD::SETGE: return "setge"; - case ISD::SETLT: return "setlt"; - case ISD::SETLE: return "setle"; - case ISD::SETNE: return "setne"; - } - } -} - -const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { - switch (AM) { - default: - return ""; - case ISD::PRE_INC: - return "<pre-inc>"; - case ISD::PRE_DEC: - return "<pre-dec>"; - case ISD::POST_INC: - return "<post-inc>"; - case ISD::POST_DEC: - return "<post-dec>"; - } -} - -std::string ISD::ArgFlagsTy::getArgFlagsString() { - std::string S = "< "; - - if (isZExt()) - S += "zext "; - if (isSExt()) - S += "sext "; - if (isInReg()) - S += "inreg "; - if (isSRet()) - S += "sret "; - if (isByVal()) - S += "byval "; - if (isNest()) - S += "nest "; - if (getByValAlign()) - S += "byval-align:" + utostr(getByValAlign()) + " "; - if (getOrigAlign()) - S += "orig-align:" + utostr(getOrigAlign()) + " "; - if (getByValSize()) - S += "byval-size:" + utostr(getByValSize()) + " "; - return S + ">"; -} - -void SDNode::dump() const { dump(0); } -void SDNode::dump(const SelectionDAG *G) const { - print(dbgs(), G); - dbgs() << '\n'; -} - -void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; - - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ","; - if (getValueType(i) == MVT::Other) - OS << "ch"; - else - OS << getValueType(i).getEVTString(); - } - OS << " = " << getOperationName(G); -} - -void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { - if (!MN->memoperands_empty()) { - OS << "<"; - OS << "Mem:"; - for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), - e = MN->memoperands_end(); i != e; ++i) { - OS << **i; - if (llvm::next(i) != e) - OS << " "; - } - OS << ">"; - } - } else if (const ShuffleVectorSDNode *SVN = - dyn_cast<ShuffleVectorSDNode>(this)) { - OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { - int Idx = SVN->getMaskElt(i); - if (i) OS << ","; - if (Idx < 0) - OS << "u"; - else - OS << Idx; - } - OS << ">"; - } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { - OS << '<' << CSDN->getAPIntValue() << '>'; - } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) - OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) - OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; - else { - OS << "<APFloat("; - CSDN->getValueAPF().bitcastToAPInt().dump(); - OS << ")>"; - } - } else if (const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(this)) { - int64_t offset = GADN->getOffset(); - OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); - OS << '>'; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = GADN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { - OS << "<" << FIDN->getIndex() << ">"; - } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { - OS << "<" << JTDN->getIndex() << ">"; - if (unsigned int TF = JTDN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ - int offset = CP->getOffset(); - if (CP->isMachineConstantPoolEntry()) - OS << "<" << *CP->getMachineCPVal() << ">"; - else - OS << "<" << *CP->getConstVal() << ">"; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = CP->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { - OS << "<"; - const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); - if (LBB) - OS << LBB->getName() << " "; - OS << (const void*)BBDN->getBasicBlock() << ">"; - } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); - } else if (const ExternalSymbolSDNode *ES = - dyn_cast<ExternalSymbolSDNode>(this)) { - OS << "'" << ES->getSymbol() << "'"; - if (unsigned int TF = ES->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { - if (M->getValue()) - OS << "<" << M->getValue() << ">"; - else - OS << "<null>"; - } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { - if (MD->getMD()) - OS << "<" << MD->getMD() << ">"; - else - OS << "<null>"; - } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { - OS << ":" << N->getVT().getEVTString(); - } - else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); - - bool doExt = true; - switch (LD->getExtensionType()) { - default: doExt = false; break; - case ISD::EXTLOAD: OS << ", anyext"; break; - case ISD::SEXTLOAD: OS << ", sext"; break; - case ISD::ZEXTLOAD: OS << ", zext"; break; - } - if (doExt) - OS << " from " << LD->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(LD->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); - - if (ST->isTruncatingStore()) - OS << ", trunc to " << ST->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(ST->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; - } else if (const BlockAddressSDNode *BA = - dyn_cast<BlockAddressSDNode>(this)) { - OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); - OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); - OS << ">"; - if (unsigned int TF = BA->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } - - if (G) - if (unsigned Order = G->GetOrdering(this)) - OS << " [ORD=" << Order << ']'; - - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; - - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } -} - -void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; - } - print_details(OS, G); -} - -static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, - const SelectionDAG *G, unsigned depth, - unsigned indent) { - if (depth == 0) - return; - - OS.indent(indent); - - N->print(OS, G); - - if (depth < 1) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - // Don't follow chain operands. - if (N->getOperand(i).getValueType() == MVT::Other) - continue; - OS << '\n'; - printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); - } -} - -void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, - unsigned depth) const { - printrWithDepthHelper(OS, this, G, depth, 0); -} - -void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { - // Don't print impossibly deep things. - printrWithDepth(OS, G, 10); -} - -void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { - printrWithDepth(dbgs(), G, depth); -} - -void SDNode::dumprFull(const SelectionDAG *G) const { - // Don't print impossibly deep things. - dumprWithDepth(G, 10); -} - -static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getNode()->hasOneUse()) - DumpNodes(N->getOperand(i).getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - - - dbgs() << "\n"; - dbgs().indent(indent); - N->dump(G); -} - SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - // If GV has specified alignment, then use it. Otherwise, use the preferred - // alignment. - unsigned Align = GV->getAlignment(); - if (!Align) { - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { - if (GVar->hasInitializer()) { - const TargetData *TD = TLI.getTargetData(); - Align = TD->getPreferredAlignment(GVar); - } - } - if (!Align) - Align = TLI.getTargetData()->getABITypeAlignment(GV->getType()); - } - return MinAlign(Align, GVOffset); + unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI.getTargetData()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } -void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; - - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) - DumpNodes(N, 2, this); - } - - if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - - dbgs() << "\n\n"; -} - -void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - print_details(OS, G); -} - -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; -static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, - const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. - return; - - // Dump the current SDNode, but don't end the line yet. - OS << std::string(indent, ' '); - N->printr(OS, G); - - // Having printed this SDNode, walk the children: - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - - if (i) OS << ","; - OS << " "; - - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } - } - - OS << "\n"; - - // Dump children that have grandchildren on their own line(s). - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - DumpNodesr(OS, child, indent+2, G, once); - } -} - -void SDNode::dumpr() const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); -} - -void SDNode::dumpr(const SelectionDAG *G) const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, G, once); -} - - // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 095b4001696f..94cb95804f69 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -41,13 +41,13 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getIntPtrConstant(1)); + DAG.getTargetConstant(1, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); - return SDValue(); } /// getCopyFromParts - Create a value that contains the specified legal parts @@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. @@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; + LibInfo = li; TD = DAG.getTarget().getTargetData(); LPadToCallSiteMap.clear(); } @@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && @@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector<SDValue, 16> Ops; - if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); @@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } llvm_unreachable("Can't get register for value!"); - return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { @@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) { +uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; if (!BPI) return 0; @@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, Condition = getICmpCondCode(IC->getPredicate()); } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); } else { Condition = ISD::SETEQ; // silence warning. llvm_unreachable("Unknown compare instruction"); @@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), @@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { -} - void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } @@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); AddLandingPadInfo(LP, MMI, MBB); + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) + return; + SmallVector<EVT, 2> ValueVTs; ComputeValueVTs(TLI, LP.getType(), ValueVTs); @@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !DisableJumpTables && + return !TLI.getTargetMachine().Options.DisableJumpTables && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } @@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - Constant *C = Pivot->Low; + const Constant *C = Pivot->Low; MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is @@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - BasicBlock *SuccBB = SI.getSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getSuccessorValue(i), - SI.getSuccessorValue(i), + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. - if (SI.getNumCases() == 1) { + if (!SI.getNumCases()) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. @@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } @@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0))); + DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I){ - // FPTrunc is never a no-op cast, no need to check + // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); @@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { TLI.getValueType(I.getType()), InVec, InIdx)); } -// Utility for visitShuffleVector - Returns true if the mask is mask starting -// from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) +// Utility for visitShuffleVector - Return true if every element in Mask, +// begining from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; } void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector<int, 8> Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector<Constant*, 8> MaskElts; - cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa<UndefValue>(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); - } - + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2)); - return; + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src2, Src1)); + return; + } } // Pad both vectors with undefs to make them the same length as the mask. @@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts+1), - static_cast<int>(SrcNumElts+1)}; + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - int Input = 0; + unsigned Input = 0; if (Idx < 0) continue; @@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Check if the access is smaller than the vector size and can we find // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } + continue; } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { + for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); @@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT PtrVT = TLI.getPointerTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; + int Idx = Mask[i]; + SDValue Res; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), @@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - Type *Ty = I.getOperand(0)->getType(); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = I.getOperand(0)->getType()->getScalarType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), N.getValueType(), IdxN, - DAG.getConstant(Amt, TLI.getPointerTy())); + DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), @@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { DebugLoc dl = getCurDebugLoc(); ISD::NodeType NT; switch (I.getOperation()) { - default: llvm_unreachable("Unknown atomicrmw operation"); return; + default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; @@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); - assert(TLI.isTypeLegal(Op.getValueType()) && - "Intrinsic uses a non-legal type?"); Ops.push_back(Op); } SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); -#ifndef NDEBUG - for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { - assert(TLI.isTypeLegal(ValueVTs[Val]) && - "Intrinsic uses a non-legal type?"); - } -#endif // NDEBUG if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); } } @@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) @@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; + } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder @@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } @@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = + (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address)); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); if (isParameter && !AI) { @@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); + DEBUG(Address->dump()); return 0; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); @@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } } - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return 0; @@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return 0; + } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) @@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } - case Intrinsic::eh_exception: { - // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[1]; - Ops[0] = DAG.getRoot(); - SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - return 0; - } - - case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CallMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CallMBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getArgOperand(0)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - DAG.setRoot(Op.getValue(1)); - setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); - return 0; - } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. @@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return 0; } - case Intrinsic::eh_sjlj_dispatch_setup: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); - return 0; - } case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: @@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, MVT::i32)); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { @@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - return 0; case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; @@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::trap: { - StringRef TrapFuncName = getTrapFunctionName(); + StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; @@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, /*isReturnValueUsed=*/true, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); DAG.setRoot(Result.second); return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); @@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If there's a possibility that fast-isel has already selected some amount // of the current basic block, don't emit a tail call. - if (isTailCall && EnableFastISel) + if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; std::pair<SDValue,SDValue> Result = @@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), isTailCall, + CS.doesNotReturn(), !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && @@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, 1); + false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, 1 /* align=1 */); + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - // See if any floating point values are being passed to this function. This is - // used to emit an undefined reference to fltused on Windows. - FunctionType *FT = - cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (FT->isVarArg() && - !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { - if (!i->isFloatingPointTy()) continue; - MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); - break; - } - } - } + ComputeUsesVAFloatArgument(I, &MMI); const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { @@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // can't be a library call. if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || + (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || + (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || + (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || + (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType()) { @@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || + (LibInfo->has(LibFunc::sinf) && Name == "sinf") || + (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || + (LibInfo->has(LibFunc::cosf) && Name == "cosf") || + (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || + (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || + (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } + } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || + (LibInfo->has(LibFunc::floorf) && Name == "floorf") || + (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || + (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || + (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || + (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || + (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || + (LibInfo->has(LibFunc::rintf) && Name == "rintf") || + (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || + (LibInfo->has(LibFunc::truncf) && Name == "truncf") || + (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || + (LibInfo->has(LibFunc::log2f) && Name == "log2f") || + (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || + (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || + (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } else if (Name == "memcmp") { if (visitMemCmpCall(I)) return; @@ -5596,22 +5690,6 @@ public: : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } - /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers - /// busy in OutputRegs/InputRegs. - void MarkAllocatedRegs(bool isOutReg, bool isInReg, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs, - const TargetRegisterInfo &TRI) const { - if (isOutReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); - } - if (isInReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); - } - } - /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. @@ -5659,18 +5737,6 @@ public: return TLI.getValueType(OpTy, true); } - -private: - /// MarkRegAndAliases - Mark the specified register and all aliases in the - /// specified set. - static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); - Regs.insert(Reg); - if (const unsigned *Aliases = TRI.getAliasSet(Reg)) - for (; *Aliases; ++Aliases) - Regs.insert(*Aliases); - } }; typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; @@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// Input and OutputRegs are the set of already allocated physical registers. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc DL, - SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs) { + SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); - // Compute whether this value requires an input register, an output register, - // or both. - bool isOutReg = false; - bool isInReg = false; - switch (OpInfo.Type) { - case InlineAsm::isOutput: - isOutReg = true; - - // If there is an input constraint that matches this, we need to reserve - // the input register so no other inputs allocate to it. - isInReg = OpInfo.hasMatchingInput(); - break; - case InlineAsm::isInput: - isInReg = true; - isOutReg = false; - break; - case InlineAsm::isClobber: - isOutReg = true; - isInReg = true; - break; - } - - MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; @@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } @@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - std::set<unsigned> OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); @@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || - isa<ConstantVector>(OpVal)) { + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), TLI.getPointerTy()); } else { @@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate output reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // If this is an indirect operand, store through the pointer after the // asm. @@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); - if (Ops.empty()) - report_fatal_error("Invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = @@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); @@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, - bool isReturnValueUsed, + bool doesNotRet, bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) const { @@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, } SmallVector<SDValue, 4> InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. @@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); - return SDValue(); } void @@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - if (EnableFastISel) + if (FastISel) return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); @@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->getCurDebugLoc()); SDB->setValue(I, Res); - if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = @@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel @@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { continue; } } - if (!isOnlyUsedInEntryBlock(I)) { + if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0a21ca3472ca..8393b414926a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -67,11 +67,11 @@ class SIToFPInst; class StoreInst; class SwitchInst; class TargetData; +class TargetLibraryInfo; class TargetLowering; class TruncInst; class UIToFPInst; class UnreachableInst; -class UnwindInst; class VAArgInst; class ZExtInst; @@ -129,13 +129,13 @@ private: /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - Constant* Low; - Constant* High; + const Constant *Low; + const Constant *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } - Case(Constant* low, Constant* high, MachineBasicBlock* bb, + Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -294,6 +294,7 @@ public: SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. @@ -338,7 +339,8 @@ public: HasTailCall(false), Context(dag.getContext()) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used @@ -451,7 +453,8 @@ private: MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, uint32_t Weight = 0); public: @@ -471,7 +474,6 @@ private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitUnwind(const UnwindInst &I); void visitBinary(const User &I, unsigned OpCode); void visitShift(const User &I, unsigned Opcode); @@ -554,8 +556,6 @@ private: void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 000000000000..f981afb437b0 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,631 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68b9146adfe1..605509bd227a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +static cl::opt<bool> +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); +#endif + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -142,14 +217,15 @@ namespace llvm { CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || + TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Latency) - return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); @@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "TargetLowering::EmitInstrWithCustomInserter!"; #endif llvm_unreachable(0); - return 0; } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - assert(!MI->getDesc().hasPostISelHook() && + assert(!MI->hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// +void SelectionDAGISel::ISelUpdater::anchor() { } + SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm)), + CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfo>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. - assert((!EnableFastISelVerbose || EnableFastISel) && + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); - assert((!EnableFastISelAbort || EnableFastISel) && + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); @@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF = &mf; RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else FuncInfo->BPI = 0; - SDB->init(GFI, *AA); + SDB->init(GFI, *AA, LibInfo); SelectAllBasicBlocks(Fn); @@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII.get(TargetOpcode::DBG_VALUE)) .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) .addImm(Offset).addMetadata(Variable); - EntryMBB->insertAfter(CopyUseMI, NewMI); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); } } } @@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt Mask; APInt KnownZero; APInt KnownOne; @@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); - CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getNameStr() + ":" + - FuncInfo->MBB->getBasicBlock()->getNameStr(); + BlockName = MF->getFunction()->getName().str() + ":" + + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); - CurDAG->Combine(Unrestricted, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize types", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber @@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Scheduling", GroupName, TimePassesIsEnabled); - Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs) Scheduler->viewGraph(); @@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); - FuncInfo->InsertPt = Scheduler->InsertPos; + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to @@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); if (Reg) MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); if (Reg) MBB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::const_iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa<EHSelectorInst>(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); - } } /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified @@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } +#ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && "<Invalid operator> "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; - if (EnableFastISel) + if (TM.Options.EnableFastISel) FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. @@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->setLastLocalValue(0); } + unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; continue; + } // Bottom-up: reset the insert pos at the top, after any local-value // instructions. @@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { + --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. @@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } continue; } +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { - ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; Inst->dump(); @@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; SelectBasicBlock(Inst, BI, HadTailCall); + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { --BI; break; } + NumFastIselRemaining = RemainingNow; continue; } if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { // Don't abort, and use a different message for terminator misses. - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed terminator: "; Inst->dump(); } } else { - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; Inst->dump(); @@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::EntryToken: // These nodes remain the same. case ISD::BasicBlock: case ISD::Register: + case ISD::RegisterMask: //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cd1647b17b9b..6cde05aea82a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" @@ -28,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" using namespace llvm; namespace llvm { @@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 907d8d9da1af..09a2b1f3d7a5 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -36,31 +36,9 @@ using namespace llvm; /// - the promotion of vector elements. This feature is disabled by default /// and only enabled using this flag. static cl::opt<bool> -AllowPromoteIntElem("promote-elements", cl::Hidden, +AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); -namespace llvm { -TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); - // FIXME: what should we do for protected and internal visibility? - // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); - - if (reloc == Reloc::PIC_) { - if (isLocal || isHidden) - return TLSModel::LocalDynamic; - else - return TLSModel::GeneralDynamic; - } else { - if (!isDeclaration || isHidden) - return TLSModel::LocalExec; - else - return TLSModel::InitialExec; - } -} -} - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10,MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10,MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::Latency; + SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; JumpBufAlignment = 0; MinFunctionAlignment = 0; @@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const { SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } @@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; - KnownZero = ~KnownOne & NewMask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without @@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getNode()->getOperand(0); EVT InnerVT = InnerOp.getValueType(); - if ((APInt::getHighBitsSet(BitWidth, - BitWidth - InnerVT.getSizeInBits()) & - DemandedMask) == 0 && + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) + if (NewMask == 1) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getScalarType().getSizeInBits()) & + ExVT.getScalarType().getSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; @@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); } break; } @@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!Op.getOperand(0).getValueType().isVector() && + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); @@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } /// ComputeNumSignBitsForTargetNode - This method can be implemented by @@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, NewAlign); + false, false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (N0.getValueType().isInteger()) { + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + case ZeroOrNegativeOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); + } + } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); @@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } // Simplify (X+Z) == X --> Z == 0 - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } } } } @@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_Memory: return 3; } + llvm_unreachable("Invalid constraint type"); } /// Examine constraint type and operand type and determine a weight value. @@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, - std::vector<SDNode*>* Created) const { +SDValue TargetLowering:: +BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; - if (isOperationLegalOrCustom(ISD::MULHS, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), DAG.getConstant(magics.m, VT)).getNode(), 1); @@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, - std::vector<SDNode*>* Created) const { +SDValue TargetLowering:: +BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); @@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - if (isOperationLegalOrCustom(ISD::MULHU, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, VT)).getNode(), 1); else diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 2609256c8ffa..0016047a134e 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -116,8 +116,7 @@ namespace { // Branches and invokes do not escape, only unwind, resume, and return // do. TerminatorInst *TI = CurBB->getTerminator(); - if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) && - !isa<ResumeInst>(TI)) + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) continue; Builder.SetInsertPoint(TI->getParent(), TI); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 160f38f69236..21ae2f5e56eb 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { } bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); + return (MBB && !MBB->empty() && MBB->back().isReturn()); } // Initialize shrink wrapping DFA sets, called before iterations. @@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getNameStr(); + std::string MFName = MF->getFunction()->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { return ""; if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getNameStr(); + return MBB->getBasicBlock()->getName().str(); std::ostringstream name; name << "_MBB_" << MBB->getNumber(); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index ded2459d4278..9a86f32d8f96 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -1,4 +1,4 @@ -//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// // // The LLVM Compiler Infrastructure // @@ -29,21 +29,20 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; -static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden, - cl::desc("Disable the old SjLj EH preparation pass")); - STATISTIC(NumInvokes, "Number of invokes replaced"); -STATISTIC(NumUnwinds, "Number of unwinds replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPass : public FunctionPass { + class SjLjEHPrepare : public FunctionPass { const TargetLowering *TLI; Type *FunctionContextTy; Constant *RegisterFn; @@ -54,16 +53,12 @@ namespace { Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; - Constant *SelectorFn; - Constant *ExceptionFn; Constant *CallSiteFn; - Constant *DispatchSetupFn; Constant *FuncCtxFn; - Value *CallSite; - DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap; + AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPass(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLowering *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -75,28 +70,24 @@ namespace { private: bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal); Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); void lowerIncomingArguments(Function &F); void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); - - void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); - void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, - SwitchInst *CatchSwitch); - void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); - void splitLandingPad(InvokeInst *II); - bool insertSjLjEHSupport(Function &F); + void insertCallSiteStore(Instruction *I, int Number); }; } // end anonymous namespace -char SjLjEHPass::ID = 0; +char SjLjEHPrepare::ID = 0; -// Public Interface To the SjLjEHPass pass. -FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { - return new SjLjEHPass(TLI); +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { + return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process // exceptions. -bool SjLjEHPass::doInitialization(Module &M) { +bool SjLjEHPrepare::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) { StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); - SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); - ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); - DispatchSetupFn - = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); PersonalityFn = 0; @@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) { /// insertCallSiteStore - Insert a store of the call-site value to the /// function context -void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, - Value *CallSite) { - ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), - Number); - // Insert a store of the call-site number - new StoreInst(CallSiteNoC, CallSite, true, I); // volatile -} - -/// splitLandingPad - Split a landing pad. This takes considerable care because -/// of PHIs and other nasties. The problem is that the jump table needs to jump -/// to the landing pad block. However, the landing pad block can be jumped to -/// only by an invoke instruction. So we clone the landingpad instruction into -/// its own basic block, have the invoke jump to there. The landingpad -/// instruction's basic block's successor is now the target for the jump table. -/// -/// But because of PHI nodes, we need to create another basic block for the jump -/// table to jump to. This is definitely a hack, because the values for the PHI -/// nodes may not be defined on the edge from the jump table. But that's okay, -/// because the jump table is simply a construct to mimic what is happening in -/// the CFG. So the values are mysteriously there, even though there is no value -/// for the PHI from the jump table's edge (hence calling this a hack). -void SjLjEHPass::splitLandingPad(InvokeInst *II) { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(), - ".1", ".2", this, NewBBs); - - // Create an empty block so that the jump table has something to jump to - // which doesn't have any PHI nodes. - BasicBlock *LPad = NewBBs[0]; - BasicBlock *Succ = *succ_begin(LPad); - BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land", - LPad->getParent(), Succ); - LPad->getTerminator()->eraseFromParent(); - BranchInst::Create(JumpTo, LPad); - BranchInst::Create(Succ, JumpTo); - LPadSuccMap[II] = JumpTo; - - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - Value *Val = PN->removeIncomingValue(LPad, false); - PN->addIncoming(Val, JumpTo); - } -} - -/// markInvokeCallSite - Insert code to mark the call_site for this invoke -void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, - Value *CallSite, - SwitchInst *CatchSwitch) { - ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); - // The runtime comes back to the dispatcher with the call_site - 1 in - // the context. Odd, but there it is. - ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo - 1); - - // If the unwind edge has phi nodes, split the edge. - if (isa<PHINode>(II->getUnwindDest()->begin())) { - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - // If there are any phi nodes left, they must have a single predecessor. - while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - PN->eraseFromParent(); - } - } +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { + IRBuilder<> Builder(I); - // Insert the store of the call site value - insertCallSiteStore(II, InvokeNo, CallSite); - - // Record the call site value for the back end so it stays associated with - // the invoke. - CallInst::Create(CallSiteFn, CallSiteNoC, "", II); - - // Add a switch case to our unwind block. - if (BasicBlock *SuccBB = LPadSuccMap[II]) { - CatchSwitch->addCase(SwitchValC, SuccBB); - } else { - CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); - } + // Get a reference to the call_site field. + Type *Int32Ty = Type::getInt32Ty(I->getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + Value *One = ConstantInt::get(Int32Ty, 1); + Value *Idxs[2] = { Zero, One }; + Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); - // We still want this to look like an invoke so we emit the LSDA properly, - // so we don't transform the invoke into a call here. + // Insert a store of the call-site number + ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), + Number); + Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. -static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { - if (!LiveBBs.insert(BB).second) return; // already been here. +static void MarkBlocksLiveIn(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 64> &LiveBBs) { + if (!LiveBBs.insert(BB)) return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); } -/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge -/// we spill into a stack location, guaranteeing that there is nothing live -/// across the unwind edge. This process also splits all critical edges -/// coming out of invoke's. -/// FIXME: Move this function to a common utility file (Local.cpp?) so -/// both SjLj and LowerInvoke can use it. -void SjLjEHPass:: -splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { - // First step, split all critical edges from invoke instructions. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - InvokeInst *II = Invokes[i]; - SplitCriticalEdge(II, 0, this); - - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - assert(!isa<PHINode>(II->getNormalDest()) && - !isa<PHINode>(II->getUnwindDest()) && - "Critical edge splitting left single entry phi nodes?"); - } - - Function *F = Invokes.back()->getParent()->getParent(); - - // To avoid having to handle incoming arguments specially, we lower each arg - // to a copy instruction in the entry block. This ensures that the argument - // value itself cannot be live across the entry block. - BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); - while (isa<AllocaInst>(AfterAllocaInsertPt) && - isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) - ++AfterAllocaInsertPt; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); - AI != E; ++AI) { - Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, - // we're replacing it here with the same value it was constructed with. - // We do this because the above replaceAllUsesWith() clobbered the - // operand, but we want this one to remain. - NC->setOperand(0, AI); - } - } - - // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - // Ignore obvious cases we don't have to handle. In particular, most - // instructions either have no uses or only have a single use inside the - // current block. Ignore them quickly. - Instruction *Inst = II; - if (Inst->use_empty()) continue; - if (Inst->hasOneUse() && - cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) continue; - - // If this is an alloca in the entry block, it's not a real register - // value. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) - if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) - continue; - - // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector<Instruction*,16> Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - if (User->getParent() != BB || isa<PHINode>(User)) - Users.push_back(User); - } - - // Find all of the blocks that this value is live in. - std::set<BasicBlock*> LiveBBs; - LiveBBs.insert(Inst->getParent()); - while (!Users.empty()) { - Instruction *U = Users.back(); - Users.pop_back(); - - if (!isa<PHINode>(U)) { - MarkBlocksLiveIn(U->getParent(), LiveBBs); - } else { - // Uses for a PHI node occur in their predecessor block. - PHINode *PN = cast<PHINode>(U); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Inst) - MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); - } - } - - // Now that we know all of the blocks that this thing is live in, see if - // it includes any of the unwind locations. - bool NeedsSpill = false; - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) - NeedsSpill = true; - } - - // If we decided we need a spill, do it. - // FIXME: Spilling this way is overkill, as it forces all uses of - // the value to be reloaded from the stack slot, even those that aren't - // in the unwind blocks. We should be more selective. - if (NeedsSpill) { - ++NumSpilled; - DemoteRegToStack(*Inst, true); - } - } -} - -/// CreateLandingPadLoad - Load the exception handling values and insert them -/// into a structure. -static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr, - Value *SelAddr, - BasicBlock::iterator InsertPt) { - Value *Exn = new LoadInst(ExnAddr, "exn", false, - InsertPt); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt); - Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt); - - Ty = StructType::get(Exn->getType(), Sel->getType(), NULL); - InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty), - Exn, 0, - "lpad.val", InsertPt); - return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt); -} - -/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a -/// load from the stored values (via CreateLandingPadLoad). This looks through -/// PHI nodes, and removes them if they are dead. -static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr, - Value *SelAddr) { - if (Inst->use_empty()) return; - - while (!Inst->use_empty()) { - Instruction *I = cast<Instruction>(Inst->use_back()); - - if (PHINode *PN = dyn_cast<PHINode>(I)) { - ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr); - if (PN->use_empty()) PN->eraseFromParent(); - continue; - } - - I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I)); - } -} - -bool SjLjEHPass::insertSjLjEHSupport(Function &F) { - SmallVector<ReturnInst*,16> Returns; - SmallVector<UnwindInst*,16> Unwinds; - SmallVector<InvokeInst*,16> Invokes; - - // Look through the terminators of the basic blocks to find invokes, returns - // and unwinds. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - // Remember all return instructions in case we insert an invoke into this - // function. - Returns.push_back(RI); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - Invokes.push_back(II); - } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - Unwinds.push_back(UI); - } - } - - NumInvokes += Invokes.size(); - NumUnwinds += Unwinds.size(); - - // If we don't have any invokes, there's nothing to do. - if (Invokes.empty()) return false; - - // Find the eh.selector.*, eh.exception and alloca calls. - // - // Remember any allocas() that aren't in the entry block, as the - // jmpbuf saved SP will need to be updated for them. - // - // We'll use the first eh.selector to determine the right personality - // function to use. For SJLJ, we always use the same personality for the - // whole function, not on a per-selector basis. - // FIXME: That's a bit ugly. Better way? - SmallVector<CallInst*,16> EH_Selectors; - SmallVector<CallInst*,16> EH_Exceptions; - SmallVector<Instruction*,16> JmpbufUpdatePoints; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // Note: Skip the entry block since there's nothing there that interests - // us. eh.selector and eh.exception shouldn't ever be there, and we - // want to disregard any allocas that are there. - // - // FIXME: This is awkward. The new EH scheme won't need to skip the entry - // block. - if (BB == F.begin()) { - if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - - continue; - } - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); - EH_Selectors.push_back(CI); - } else if (CI->getCalledFunction() == ExceptionFn) { - EH_Exceptions.push_back(CI); - } else if (CI->getCalledFunction() == StackRestoreFn) { - JmpbufUpdatePoints.push_back(CI); - } - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - JmpbufUpdatePoints.push_back(AI); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - } - } - - // If we don't have any eh.selector calls, we can't determine the personality - // function. Without a personality function, we can't process exceptions. - if (!PersonalityFn) return false; - - // We have invokes, so we need to add register/unregister calls to get this - // function onto the global unwind stack. - // - // First thing we need to do is scan the whole function for values that are - // live across unwind edges. Each value that is live across an unwind edge we - // spill into a stack location, guaranteeing that there is nothing live across - // the unwind edge. This process also splits all critical edges coming out of - // invoke's. - splitLiveRangesAcrossInvokes(Invokes); - - - SmallVector<LandingPadInst*, 16> LandingPads; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - LandingPads.push_back(LPI); +/// substituteLPadValues - Substitute the values returned by the landingpad +/// instruction with those returned by the personality function. +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { + SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + while (!UseWorkList.empty()) { + Value *Val = UseWorkList.pop_back_val(); + ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); + if (!EVI) continue; + if (EVI->getNumIndices() != 1) continue; + if (*EVI->idx_begin() == 0) + EVI->replaceAllUsesWith(ExnVal); + else if (*EVI->idx_begin() == 1) + EVI->replaceAllUsesWith(SelVal); + if (EVI->getNumUses() == 0) + EVI->eraseFromParent(); } + if (LPI->getNumUses() == 0) return; - BasicBlock *EntryBB = F.begin(); - // Create an alloca for the incoming jump buffer ptr and the new jump buffer - // that needs to be restored on all exits from the function. This is an - // alloca because the value needs to be added to the global context list. - unsigned Align = 4; // FIXME: Should be a TLI check? - AllocaInst *FunctionContext = - new AllocaInst(FunctionContextTy, 0, Align, - "fcn_context", F.begin()->begin()); - - Value *Idxs[2]; - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - // We need to also keep around a reference to the call_site field - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 1); - CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site", - EntryBB->getTerminator()); - - // The exception selector comes back in context->data[1] - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 1); - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, - "exc_selector_gep", - EntryBB->getTerminator()); - // The exception value comes back in context->data[0] - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, - "exception_gep", - EntryBB->getTerminator()); - - // The result of the eh.selector call will be replaced with a a reference to - // the selector value returned in the function context. We leave the selector - // itself so the EH analysis later can use it. - for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { - CallInst *I = EH_Selectors[i]; - Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); - I->replaceAllUsesWith(SelectorVal); - } - - // eh.exception calls are replaced with references to the proper location in - // the context. Unlike eh.selector, the eh.exception calls are removed - // entirely. - for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { - CallInst *I = EH_Exceptions[i]; - // Possible for there to be duplicates, so check to make sure the - // instruction hasn't already been removed. - if (!I->getParent()) continue; - Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); - - I->replaceAllUsesWith(Val); - I->eraseFromParent(); - } - - for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) - ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr); - - // The entry block changes to have the eh.sjlj.setjmp, with a conditional - // branch to a dispatch block for non-zero returns. If we return normally, - // we're not handling an exception and just register the function context and - // continue. - - // Create the dispatch block. The dispatch block is basically a big switch - // statement that goes to all of the invoke landing pads. - BasicBlock *DispatchBlock = - BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we issue a trap statement. - BasicBlock *TrapBlock = - BasicBlock::Create(F.getContext(), "trapbb", &F); - CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), - "", TrapBlock); - new UnreachableInst(F.getContext(), TrapBlock); - - Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, - DispatchBlock); - SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), - DispatchBlock); - // Split the entry block to insert the conditional branch for the setjmp. - BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), - "eh.sjlj.setjmp.cont"); - - // Populate the Function Context - // 1. LSDA address - // 2. Personality function address - // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) - - // LSDA address - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); - - Idxs[1] = ConstantInt::get(Int32Ty, 3); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); - - // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr - = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); - - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); - - // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); - - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); - - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, - "", - EntryBB->getTerminator()); - - // Add a call to dispatch_setup after the setjmp call. This is expanded to any - // target-specific setup that needs to be done. - CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator()); + // There are still some uses of LPI. Construct an aggregate with the exception + // values and replace the LPI with that aggregate. + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + IRBuilder<> + Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); - // check the return value of the setjmp. non-zero goes to dispatcher. - Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), - ICmpInst::ICMP_EQ, DispatchVal, Zero, - "notunwind"); - // Nuke the uncond branch. - EntryBB->getTerminator()->eraseFromParent(); - - // Put in a new condbranch in its place. - BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); - - // Register the function context and make sure it's known to not throw - CallInst *Register = - CallInst::Create(RegisterFn, FunctionContext, "", - ContBlock->getTerminator()); - Register->setDoesNotThrow(); - - // At this point, we are all set up, update the invoke instructions to mark - // their call_site values, and fill in the dispatch switch accordingly. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - - // Mark call instructions that aren't nounwind as no-action (call_site == - // -1). Skip the entry block, as prior to then, no function context has been - // created for this function and any unexpected exceptions thrown will go - // directly to the caller's context, which is what we want anyway, so no need - // to do anything here. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { - for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) - if (CallInst *CI = dyn_cast<CallInst>(I)) { - // Ignore calls to the EH builtins (eh.selector, eh.exception) - Constant *Callee = CI->getCalledFunction(); - if (Callee != SelectorFn && Callee != ExceptionFn - && !CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); - } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { - insertCallSiteStore(RI, -1, CallSite); - } - } - - // Replace all unwinds with a branch to the unwind handler. - // ??? Should this ever happen with sjlj exceptions? - for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(TrapBlock, Unwinds[i]); - Unwinds[i]->eraseFromParent(); - } - - // Following any allocas not in the entry block, update the saved SP in the - // jmpbuf to the new value. - for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { - Instruction *AI = JmpbufUpdatePoints[i]; - Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(AI); - Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); - StoreStackAddr->insertAfter(StackAddr); - } - - // Finally, for any returns from this function, if this function contains an - // invoke, add a call to unregister the function context. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) - CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); - - return true; + LPI->replaceAllUsesWith(LPadVal); } /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPass:: +Value *SjLjEHPrepare:: setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { BasicBlock *EntryBB = F.begin(); @@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { // because the value needs to be added to the global context list. unsigned Align = TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy); - AllocaInst *FuncCtx = + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Value *Idxs[2]; Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *Zero = ConstantInt::get(Int32Ty, 0); Value *One = ConstantInt::get(Int32Ty, 1); + Value *Two = ConstantInt::get(Int32Ty, 2); + Value *Three = ConstantInt::get(Int32Ty, 3); + Value *Four = ConstantInt::get(Int32Ty, 4); - // Keep around a reference to the call_site field. - Idxs[0] = Zero; - Idxs[1] = One; - CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site", - EntryBB->getTerminator()); - - // Reference the __data field. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data", - EntryBB->getTerminator()); - - // The exception value comes back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, - "exception_gep", - EntryBB->getTerminator()); - - // The exception selector comes back in context->__data[1]. - Idxs[1] = One; - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, - "exn_selector_gep", - EntryBB->getTerminator()); + Value *Idxs[2] = { Zero, 0 }; for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + // Reference the __data field. + Idxs[1] = Two; + Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + + // The exception values come back in context->__data[0]. + Idxs[1] = Zero; + Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); - Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); - Type *LPadType = LPI->getType(); - Value *LPadVal = UndefValue::get(LPadType); - LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); - LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); + Idxs[1] = One; + Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); + Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); - LPI->replaceAllUsesWith(LPadVal); + substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = ConstantInt::get(Int32Ty, 3); + Idxs[1] = Three; if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); Value *PersonalityFieldPtr = @@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { EntryBB->getTerminator()); // LSDA address - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", EntryBB->getTerminator()); + Idxs[1] = Four; + Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", + EntryBB->getTerminator()); new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); return FuncCtx; @@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. -void SjLjEHPass::lowerIncomingArguments(Function &F) { +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) @@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. -void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst*> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { @@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, } // Find all of the blocks that this value is live in. - std::set<BasicBlock*> LiveBBs; + SmallPtrSet<BasicBlock*, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " + << UnwindBlock->getName() << "\n"); NeedsSpill = true; + break; } } @@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { - ++NumSpilled; DemoteRegToStack(*Inst, true); + ++NumSpilled; } } } + + // Go through the landing pads and remove any PHIs there. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); + + // Place PHIs into a set to avoid invalidating the iterator. + SmallPtrSet<PHINode*, 8> PHIsToDemote; + for (BasicBlock::iterator + PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + PHIsToDemote.insert(cast<PHINode>(PN)); + if (PHIsToDemote.empty()) continue; + + // Demote the PHIs to the stack. + for (SmallPtrSet<PHINode*, 8>::iterator + I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + DemotePHIToStack(*I); + + // Move the landingpad instruction back to the top of the landing pad block. + LPI->moveBefore(UnwindBlock->begin()); + } } /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. -bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst*, 16> Returns; SmallVector<InvokeInst*, 16> Invokes; - SmallVector<LandingPadInst*, 16> LPads; + SmallSetVector<LandingPadInst*, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Invokes.push_back(II); - LPads.push_back(II->getUnwindDest()->getLandingPadInst()); + LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; + NumInvokes += Invokes.size(); + lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); - Value *FuncCtx = setupFunctionContext(F, LPads); + Value *FuncCtx = + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); Type *Int32Ty = Type::getInt32Ty(F.getContext()); @@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { - insertCallSiteStore(Invokes[I], I + 1, CallSite); + insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); @@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (!CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); + insertCallSiteStore(CI, -1); } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { - insertCallSiteStore(RI, -1, CallSite); + insertCallSiteStore(RI, -1); } // Register the function context and make sure it's known to not throw @@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { EntryBB->getTerminator()); Register->setDoesNotThrow(); + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (BB == F.begin()) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() != StackRestoreFn) + continue; + } else if (!isa<AllocaInst>(I)) { + continue; + } + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(I); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) @@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { return true; } -bool SjLjEHPass::runOnFunction(Function &F) { - bool Res = false; - if (!DisableOldSjLjEH) - Res = insertSjLjEHSupport(F); - else - Res = setupEntryBlockAndCallSites(F); +bool SjLjEHPrepare::runOnFunction(Function &F) { + bool Res = setupEntryBlockAndCallSites(F); return Res; } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index ca79cafcf4be..c5bd3a3cae63 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::LOAD); + SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + SlotIndex::Slot_Block))); ++functionSize; } @@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } // Sort the Idx2MBBMap std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); - DEBUG(dump()); + DEBUG(mf->print(dbgs(), this)); // And we're done! return false; @@ -166,7 +168,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "LudS"[getSlot()]; + os << entry().getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b6bbcd7176dd..4cd22eb60f55 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,8 +11,8 @@ #include "Spiller.h" #include "VirtRegMap.h" -#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,7 +29,7 @@ using namespace llvm; namespace { - enum SpillerName { trivial, standard, inline_ }; + enum SpillerName { trivial, inline_ }; } static cl::opt<SpillerName> @@ -37,10 +37,9 @@ spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), - cl::init(standard)); + cl::init(trivial)); // Spiller virtual destructor implementation. Spiller::~Spiller() {} @@ -73,8 +72,9 @@ protected: /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. - void trivialSpillEverywhere(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals) { + void trivialSpillEverywhere(LiveRangeEdit& LRE) { + LiveInterval* li = &LRE.getParent(); + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -116,17 +116,14 @@ protected: } // Create a new vreg & interval for this instr. - unsigned newVReg = mri->createVirtualRegister(trc); - vrm->grow(); - vrm->assignVirt2StackSlot(newVReg, ss); - LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + LiveInterval *newLI = &LRE.create(); newLI->weight = HUGE_VALF; // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newVReg); + mop.setReg(newLI->reg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -136,33 +133,29 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc, + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, tri); MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, loadInstr); + lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = - newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); + newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, true, ss, trc, tri); MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, storeInstr); + lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = - newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); + newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } - - newIntervals.push_back(newLI); } } }; @@ -182,60 +175,20 @@ public: void spill(LiveRangeEdit &LRE) { // Ignore spillIs - we don't use it. - trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs()); + trivialSpillEverywhere(LRE); } }; } // end anonymous namespace -namespace { - -/// Falls back on LiveIntervals::addIntervalsForSpills. -class StandardSpiller : public Spiller { -protected: - MachineFunction *mf; - LiveIntervals *lis; - LiveStacks *lss; - MachineLoopInfo *loopInfo; - VirtRegMap *vrm; -public: - StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : mf(&mf), - lis(&pass.getAnalysis<LiveIntervals>()), - lss(&pass.getAnalysis<LiveStacks>()), - loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), - vrm(&vrm) {} - - /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveRangeEdit &LRE) { - std::vector<LiveInterval*> added = - lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), - loopInfo, *vrm); - LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), - added.begin(), added.end()); - - // Update LiveStacks. - int SS = vrm->getStackSlot(LRE.getReg()); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); - LiveInterval &SI = lss->getOrCreateInterval(SS, RC); - if (!SI.hasAtLeastOneValue()) - SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); - } -}; - -} // end anonymous namespace +void Spiller::anchor() { } llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { switch (spillerOpt) { - default: assert(0 && "unknown spiller"); case trivial: return new TrivialSpiller(pass, mf, vrm); - case standard: return new StandardSpiller(pass, mf, vrm); case inline_: return createInlineSpiller(pass, mf, vrm); } + llvm_unreachable("Invalid spiller optimization"); } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 41f1727da439..b7d5beaab1b2 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -22,6 +22,7 @@ namespace llvm { /// Implementations are utility classes which insert spill or remat code on /// demand. class Spiller { + virtual void anchor(); public: virtual ~Spiller() = 0; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 63627800af69..9959f74d5f27 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "regalloc" #include "SplitKit.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); // Compute split points on the first call. The pair is independent of the // current live interval. if (!LSP.first.isValid()) { MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); if (FirstTerm == MBB->end()) - LSP.first = LIS.getMBBEndIdx(MBB); + LSP.first = MBBEnd; else LSP.first = LIS.getInstructionIndex(FirstTerm); @@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); I != E;) { --I; - if (I->getDesc().isCall()) { + if (I->isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } @@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { // If CurLI is live into a landing pad successor, move the last split point // back to the call that may throw. - if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad)) - return LSP.second; - else + if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.first; + + // Find the value leaving MBB. + const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd); + if (!VNI) + return LSP.first; + + // If the value leaving MBB was defined after the call in MBB, it can't + // really be live-in to the landing pad. This can happen if the landing pad + // has a PHI, and this register is undef on the exceptional edge. + // <rdar://problem/10664933> + if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd) return LSP.first; + + // Value is properly live-in to the landing pad. + // Only allow splits before the call. + return LSP.second; +} + +MachineBasicBlock::iterator +SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) { + SlotIndex LSP = getLastSplitPoint(MBB->getNumber()); + if (LSP == LIS.getMBBEndIdx(MBB)) + return MBB->end(); + return LIS.getInstructionFromIndex(LSP); } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. @@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() { I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; ++I) if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(LIS, TII, 0); + Edit->anyRematerializable(0); } void SplitEditor::dump() const { @@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, LiveInterval *LI = Edit->get(RegIdx); // Create a new value. - VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); // Use insert for lookup, so we can add missing values with a second lookup. std::pair<ValueMap::iterator, bool> InsP = @@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) - .getDefIndex(); + .getRegSlot(); ++NumCopies; } // Define the value in Reg. - VNInfo *VNI = defValue(RegIdx, ParentVNI, Def); - VNI->setCopy(CopyMI); - return VNI; + return defValue(RegIdx, ParentVNI, Def); } /// Create a new virtual register and live interval. unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(LIS, VRM); + Edit->create(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(LIS, VRM); + Edit->create(); return OpenIdx; } @@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { } DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, - LIS.getLastSplitPoint(Edit->getParent(), &MBB)); + SA.getLastSplitPointIter(&MBB)); RegAssign.insert(VNI->def, End, OpenIdx); DEBUG(dump()); return VNI->def; @@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); - assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && "Parent changes value in extended range"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); @@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex(); + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() { SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); Dom.second = defFromParent(0, ParentVNI, Last, *Dom.first, - LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def; + SA.getLastSplitPointIter(Dom.first))->def; } // Remove redundant back-copies that are now known to be dominated by another @@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // use the same register as the def, so just do that always. SlotIndex Idx = LIS.getInstructionIndex(MI); if (MO.isDef() || MO.isUndef()) - Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isEarlyClobber()); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); @@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (!Edit->getParent().liveAt(Idx)) continue; } else - Idx = Idx.getUseIndex(); + Idx = Idx.getRegSlot(true); getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); @@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() { LiveInterval *LI = *I; for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { - // Dead defs end at the store slot. - if (LII->end != LII->valno->def.getNextSlot()) + // Dead defs end at the dead slot. + if (LII->end != LII->valno->def.getDeadSlot()) continue; MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); assert(MI && "Missing instruction for dead def"); @@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); + Edit->eliminateDeadDefs(Dead); } void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { @@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { unsigned RegIdx = RegAssign.lookup(ParentVNI->def); VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); VNI->setIsPHIDef(ParentVNI->isPHIDef()); - VNI->setCopy(ParentVNI->getCopy()); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. @@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { break; case SM_Speed: llvm_unreachable("Spill mode 'speed' not implemented yet"); - break; } // Transfer the simply mapped values, check if any are skipped. @@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create(LIS, VRM)); + dups.push_back(&Edit->create()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index d8fc2122a3c7..4005a3d5cbbf 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -46,9 +46,6 @@ public: const MachineLoopInfo &Loops; const TargetInstrInfo &TII; - // Sorted slot indexes of using instructions. - SmallVector<SlotIndex, 8> UseSlots; - /// Additional information about basic blocks where the current variable is /// live. Such a block will look like one of these templates: /// @@ -85,6 +82,9 @@ private: // Current live interval. const LiveInterval *CurLI; + // Sorted slot indexes of using instructions. + SmallVector<SlotIndex, 8> UseSlots; + /// LastSplitPoint - Last legal split point in each basic block in the current /// function. The first entry is the first terminator, the second entry is the /// last valid split point for a variable that is live in to a landing pad @@ -135,7 +135,7 @@ public: /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// getLastSplitPoint - Return that base index of the last valid split point + /// getLastSplitPoint - Return the base index of the last valid split point /// in the basic block numbered Num. SlotIndex getLastSplitPoint(unsigned Num) { // Inline the common simple case. @@ -145,6 +145,9 @@ public: return computeLastSplitPoint(Num); } + /// getLastSplitPointIter - Returns the last split point as an iterator. + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*); + /// isOriginalEndpoint - Return true if the original live range was killed or /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, /// and 'use' for an early-clobber def. @@ -152,6 +155,10 @@ public: /// splitting. bool isOriginalEndpoint(SlotIndex Idx) const; + /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI. + /// This include both use and def operands, at most one entry per instruction. + ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; } + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks /// where CurLI has uses. ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; } diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp deleted file mode 100644 index 77973b72bbc8..000000000000 --- a/lib/CodeGen/Splitter.cpp +++ /dev/null @@ -1,827 +0,0 @@ -//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loopsplitter" - -#include "Splitter.h" - -#include "llvm/Module.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -char LoopSplitter::ID = 0; -INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) - -namespace llvm { - - class StartSlotComparator { - public: - StartSlotComparator(LiveIntervals &lis) : lis(lis) {} - bool operator()(const MachineBasicBlock *mbb1, - const MachineBasicBlock *mbb2) const { - return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); - } - private: - LiveIntervals &lis; - }; - - class LoopSplit { - public: - LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) - : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Cannot split physical registers."); - } - - LiveInterval& getLI() const { return li; } - - MachineLoop& getLoop() const { return loop; } - - bool isValid() const { return valid; } - - bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } - - void invalidate() { valid = false; } - - void splitIncoming() { inSplit = true; } - - void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } - - void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } - - void apply() { - assert(valid && "Attempt to apply invalid split."); - applyIncoming(); - applyOutgoing(); - copyRanges(); - renameInside(); - } - - private: - LoopSplitter &ls; - LiveInterval &li; - MachineLoop &loop; - bool valid, inSplit; - std::set<MachineLoop::Edge> outSplits; - std::vector<MachineInstr*> loopInstrs; - - LiveInterval *newLI; - std::map<VNInfo*, VNInfo*> vniMap; - - LiveInterval* getNewLI() { - if (newLI == 0) { - const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); - unsigned vreg = ls.mri->createVirtualRegister(trc); - newLI = &ls.lis->getOrCreateInterval(vreg); - } - return newLI; - } - - VNInfo* getNewVNI(VNInfo *oldVNI) { - VNInfo *newVNI = vniMap[oldVNI]; - - if (newVNI == 0) { - newVNI = getNewLI()->createValueCopy(oldVNI, - ls.lis->getVNInfoAllocator()); - vniMap[oldVNI] = newVNI; - } - - return newVNI; - } - - void applyIncoming() { - if (!inSplit) { - return; - } - - MachineBasicBlock *preHeader = loop.getLoopPreheader(); - if (preHeader == 0) { - assert(ls.canInsertPreHeader(loop) && - "Can't insert required preheader."); - preHeader = &ls.insertPreHeader(loop); - } - - LiveRange *preHeaderRange = - ls.lis->findExitingRange(li, preHeader); - assert(preHeaderRange != 0 && "Range not live into preheader."); - - // Insert the new copy. - MachineInstr *copy = BuildMI(*preHeader, - preHeader->getFirstTerminator(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(getNewLI()->reg, RegState::Define) - .addReg(li.reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); - - VNInfo *newVal = getNewVNI(preHeaderRange->valno); - newVal->def = copyDefIdx; - newVal->setCopy(copy); - li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); - - getNewLI()->addRange(LiveRange(copyDefIdx, - ls.lis->getMBBEndIdx(preHeader), - newVal)); - } - - void applyOutgoing() { - - for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(), - osEnd = outSplits.end(); - osItr != osEnd; ++osItr) { - MachineLoop::Edge edge = *osItr; - MachineBasicBlock *outBlock = edge.second; - if (ls.isCriticalEdge(edge)) { - assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); - outBlock = &ls.splitEdge(edge, loop); - } - LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); - assert(outRange != 0 && "No exiting range?"); - - MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(li.reg, RegState::Define) - .addReg(getNewLI()->reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); - - // Blow away output range definition. - outRange->valno->def = ls.lis->getInvalidIndex(); - li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); - - SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock); - assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = - getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator()); - - getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), - copyDefIdx, newVal)); - - } - } - - void copyRange(LiveRange &lr) { - std::pair<bool, LoopSplitter::SlotPair> lsr = - ls.getLoopSubRange(lr, loop); - - if (!lsr.first) - return; - - LiveRange loopRange(lsr.second.first, lsr.second.second, - getNewVNI(lr.valno)); - - li.removeRange(loopRange.start, loopRange.end, true); - - getNewLI()->addRange(loopRange); - } - - void copyRanges() { - for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); - if (instr.modifiesRegister(li.reg, 0)) { - LiveRange *defRange = - li.getLiveRangeContaining(instrIdx.getDefIndex()); - if (defRange != 0) // May have caught this already. - copyRange(*defRange); - } - if (instr.readsRegister(li.reg, 0)) { - LiveRange *useRange = - li.getLiveRangeContaining(instrIdx.getUseIndex()); - if (useRange != 0) { // May have caught this already. - copyRange(*useRange); - } - } - } - - for (MachineLoop::block_iterator bbItr = loop.block_begin(), - bbEnd = loop.block_end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock &loopBlock = **bbItr; - LiveRange *enteringRange = - ls.lis->findEnteringRange(li, &loopBlock); - if (enteringRange != 0) { - copyRange(*enteringRange); - } - } - } - - void renameInside() { - for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - for (unsigned i = 0; i < instr.getNumOperands(); ++i) { - MachineOperand &mop = instr.getOperand(i); - if (mop.isReg() && mop.getReg() == li.reg) { - mop.setReg(getNewLI()->reg); - } - } - } - } - - }; - - void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); - au.addRequired<MachineLoopInfo>(); - au.addPreserved<MachineLoopInfo>(); - au.addPreservedID(RegisterCoalescerPassID); - au.addPreserved<CalculateSpillWeights>(); - au.addPreserved<LiveStacks>(); - au.addRequired<SlotIndexes>(); - au.addPreserved<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - au.addPreserved<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); - sis = &getAnalysis<SlotIndexes>(); - lis = &getAnalysis<LiveIntervals>(); - mli = &getAnalysis<MachineLoopInfo>(); - mdt = &getAnalysis<MachineDominatorTree>(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - dbgs() << "Splitting " << mf->getFunction()->getName() << "."; - - dumpOddTerminators(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - -// std::deque<MachineLoop*> loops; -// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); -// dbgs() << "Loops:\n"; -// while (!loops.empty()) { -// MachineLoop &loop = *loops.front(); -// loops.pop_front(); -// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - -// dumpLoopInfo(loop); -// } - - //lis->dump(); - //exit(0); - - // Setup initial intervals. - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isVirtualRegister(li->reg) && - !lis->intervalIsInOneMBB(*li)) { - intervals.push_back(li); - } - } - - processIntervals(); - - intervals.clear(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - - dumpOddTerminators(); - - //exit(1); - - return false; - } - - void LoopSplitter::releaseMemory() { - fqn.clear(); - intervals.clear(); - loopRangeMap.clear(); - } - - void LoopSplitter::dumpOddTerminators() { - for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock *mbb = &*bbItr; - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - if (tii->AnalyzeBranch(*mbb, a, b, c)) { - dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; - dbgs() << " Terminators:\n"; - for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); - iItr != iEnd; ++iItr) { - MachineInstr *instr= &*iItr; - dbgs() << " " << *instr << ""; - } - dbgs() << "\n Listed successors: [ "; - for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); - sItr != sEnd; ++sItr) { - MachineBasicBlock *succMBB = *sItr; - dbgs() << succMBB->getNumber() << " "; - } - dbgs() << "]\n\n"; - } - } - } - - void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { - MachineBasicBlock &headerBlock = *loop.getHeader(); - typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; - ExitEdgesList exitEdges; - loop.getExitEdges(exitEdges); - - dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; - for (std::vector<MachineBasicBlock*>::const_iterator - subBlockItr = loop.getBlocks().begin(), - subBlockEnd = loop.getBlocks().end(); - subBlockItr != subBlockEnd; ++subBlockItr) { - MachineBasicBlock &subBlock = **subBlockItr; - dbgs() << "BB#" << subBlock.getNumber() << " "; - } - dbgs() << "], Exit edges: [ "; - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge &exitEdge = *exitEdgeItr; - dbgs() << "(MBB#" << exitEdge.first->getNumber() - << ", MBB#" << exitEdge.second->getNumber() << ") "; - } - dbgs() << "], Sub-Loop Headers: [ "; - for (MachineLoop::iterator subLoopItr = loop.begin(), - subLoopEnd = loop.end(); - subLoopItr != subLoopEnd; ++subLoopItr) { - MachineLoop &subLoop = **subLoopItr; - MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); - dbgs() << "BB#" << subLoopBlock.getNumber() << " "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { - mbb.updateTerminator(); - - for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); - miItr != miEnd; ++miItr) { - if (lis->isNotInMIMap(miItr)) { - lis->InsertMachineInstrInMaps(miItr); - } - } - } - - bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { - MachineBasicBlock *header = loop.getHeader(); - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - - for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), - pbEnd = header->pred_end(); - pbItr != pbEnd; ++pbItr) { - MachineBasicBlock *predBlock = *pbItr; - if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { - return false; - } - } - - MachineFunction::iterator headerItr(header); - if (headerItr == mf->begin()) - return true; - MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); - assert(headerLayoutPred != 0 && "Header should have layout pred."); - - return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { - assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); - - MachineBasicBlock &header = *loop.getHeader(); - - // Save the preds - we'll need to update them once we insert the preheader. - typedef std::set<MachineBasicBlock*> HeaderPreds; - HeaderPreds headerPreds; - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (!loop.contains(*predItr)) - headerPreds.insert(*predItr); - } - - assert(!headerPreds.empty() && "No predecessors for header?"); - - //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; - - MachineBasicBlock *preHeader = - mf->CreateMachineBasicBlock(header.getBasicBlock()); - - assert(preHeader != 0 && "Failed to create pre-header."); - - mf->insert(header, preHeader); - - for (HeaderPreds::iterator hpItr = headerPreds.begin(), - hpEnd = headerPreds.end(); - hpItr != hpEnd; ++hpItr) { - assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); - MachineBasicBlock &hp = **hpItr; - hp.ReplaceUsesOfBlockWith(&header, preHeader); - } - preHeader->addSuccessor(&header); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(preHeader)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(preHeader); - - if (MachineLoop *parentLoop = loop.getParentLoop()) { - assert(parentLoop->getHeader() != loop.getHeader() && - "Parent loop has same header?"); - parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); - - // Invalidate all parent loop ranges. - while (parentLoop != 0) { - loopRangeMap.erase(parentLoop); - parentLoop = parentLoop->getParentLoop(); - } - } - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - - // Is this safe for physregs? - // TargetRegisterInfo::isPhysicalRegister(li.reg) || - if (!lis->isLiveInToMBB(li, &header)) - continue; - - if (lis->isLiveInToMBB(li, preHeader)) { - assert(lis->isLiveOutOfMBB(li, preHeader) && - "Range terminates in newly added preheader?"); - continue; - } - - bool insertRange = false; - - for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), - predEnd = preHeader->pred_end(); - predItr != predEnd; ++predItr) { - MachineBasicBlock *predMBB = *predItr; - if (lis->isLiveOutOfMBB(li, predMBB)) { - insertRange = true; - break; - } - } - - if (!insertRange) - continue; - - SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), - lis->getMBBEndIdx(preHeader), - newVal)); - } - - - //dbgs() << "Dumping SlotIndexes:\n"; - //sis->dump(); - - //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; - - return *preHeader; - } - - bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { - assert(edge.first->succ_size() > 1 && "Non-sensical edge."); - if (edge.second->pred_size() > 1) - return true; - return false; - } - - bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { - MachineFunction::iterator outBlockItr(edge.second); - if (outBlockItr == mf->begin()) - return true; - MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); - assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && - !tii->AnalyzeBranch(*edge.first, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, - MachineLoop &loop) { - - MachineBasicBlock &inBlock = *edge.first; - MachineBasicBlock &outBlock = *edge.second; - - assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && - "Splitting non-critical edge?"); - - //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() - // << " -> MBB#" << outBlock.getNumber() << ")..."; - - MachineBasicBlock *splitBlock = - mf->CreateMachineBasicBlock(); - - assert(splitBlock != 0 && "Failed to create split block."); - - mf->insert(&outBlock, splitBlock); - - inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); - splitBlock->addSuccessor(&outBlock); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(splitBlock)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(splitBlock); - - loopRangeMap.erase(&loop); - - MachineLoop *splitParentLoop = loop.getParentLoop(); - while (splitParentLoop != 0 && - !splitParentLoop->contains(&outBlock)) { - splitParentLoop = splitParentLoop->getParentLoop(); - } - - if (splitParentLoop != 0) { - assert(splitParentLoop->contains(&loop) && - "Split-block parent doesn't contain original loop?"); - splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); - - // Invalidate all parent loop ranges. - while (splitParentLoop != 0) { - loopRangeMap.erase(splitParentLoop); - splitParentLoop = splitParentLoop->getParentLoop(); - } - } - - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && - lis->isLiveInToMBB(li, &outBlock); - if (lis->isLiveInToMBB(li, splitBlock)) { - if (!intersects) { - li.removeRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), true); - } - } else if (intersects) { - SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, - lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), - newVal)); - } - } - - //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; - - return *splitBlock; - } - - LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { - typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet; - LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); - if (lrItr == loopRangeMap.end()) { - LoopMBBSet loopMBBs((StartSlotComparator(*lis))); - std::copy(loop.block_begin(), loop.block_end(), - std::inserter(loopMBBs, loopMBBs.begin())); - - assert(!loopMBBs.empty() && "No blocks in loop?"); - - LoopRanges &loopRanges = loopRangeMap[&loop]; - assert(loopRanges.empty() && "Loop encountered but not processed?"); - SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); - loopRanges.push_back( - std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), - lis->getInvalidIndex())); - for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), - curBlockEnd = loopMBBs.end(); - curBlockItr != curBlockEnd; ++curBlockItr) { - SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); - if (newStart != oldEnd) { - loopRanges.back().second = oldEnd; - loopRanges.push_back(std::make_pair(newStart, - lis->getInvalidIndex())); - } - oldEnd = lis->getMBBEndIdx(*curBlockItr); - } - - loopRanges.back().second = - lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); - - return loopRanges; - } - return lrItr->second; - } - - std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange( - const LiveRange &lr, - MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - LoopRanges::iterator lrItr = loopRanges.begin(), - lrEnd = loopRanges.end(); - while (lrItr != lrEnd && lr.start >= lrItr->second) { - ++lrItr; - } - - if (lrItr == lrEnd) { - SlotIndex invalid = lis->getInvalidIndex(); - return std::make_pair(false, SlotPair(invalid, invalid)); - } - - SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); - SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); - - return std::make_pair(true, SlotPair(srStart, srEnd)); - } - - void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; - for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); - lrItr != lrEnd; ++lrItr) { - dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::processHeader(LoopSplit &split) { - MachineBasicBlock &header = *split.getLoop().getHeader(); - //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; - - if (!lis->isLiveInToMBB(split.getLI(), &header)) - return; // Not live in, but nothing wrong so far. - - MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); - if (!preHeader) { - - if (!canInsertPreHeader(split.getLoop())) { - split.invalidate(); - return; // Couldn't insert a pre-header. Bail on this interval. - } - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { - split.splitIncoming(); - break; - } - } - } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { - split.splitIncoming(); - } - } - - void LoopSplitter::processLoopExits(LoopSplit &split) { - typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; - ExitEdgesList exitEdges; - split.getLoop().getExitEdges(exitEdges); - - //dbgs() << " Processing loop exits:\n"; - - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge exitEdge = *exitEdgeItr; - - LiveRange *outRange = - split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); - - if (outRange != 0) { - if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { - split.invalidate(); - return; - } - - split.splitOutgoing(exitEdge); - } - } - } - - void LoopSplitter::processLoopUses(LoopSplit &split) { - std::set<MachineInstr*> processed; - - for (MachineRegisterInfo::reg_iterator - rItr = mri->reg_begin(split.getLI().reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - MachineInstr &instr = *rItr; - if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { - split.addLoopInstr(&instr); - processed.insert(&instr); - } - } - - //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg - // << " in blocks [ "; - //dbgs() << "]\n"; - } - - bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Attempt to split physical register."); - - LoopSplit split(*this, li, loop); - processHeader(split); - if (split.isValid()) - processLoopExits(split); - if (split.isValid()) - processLoopUses(split); - if (split.isValid() /* && split.isWorthwhile() */) { - split.apply(); - DEBUG(dbgs() << "Success.\n"); - return true; - } - DEBUG(dbgs() << "Failed.\n"); - return false; - } - - void LoopSplitter::processInterval(LiveInterval &li) { - std::deque<MachineLoop*> loops; - std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); - - while (!loops.empty()) { - MachineLoop &loop = *loops.front(); - loops.pop_front(); - DEBUG( - dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" - << loop.getHeader()->getNumber() << " "; - ); - if (!splitOverLoop(li, loop)) { - // Couldn't split over outer loop, schedule sub-loops to be checked. - std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - } - } - } - - void LoopSplitter::processIntervals() { - while (!intervals.empty()) { - LiveInterval &li = *intervals.front(); - intervals.pop_front(); - - assert(!lis->intervalIsInOneMBB(li) && - "Single interval in process worklist."); - - processInterval(li); - } - } - -} diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h deleted file mode 100644 index 9fb1b8b30139..000000000000 --- a/lib/CodeGen/Splitter.h +++ /dev/null @@ -1,101 +0,0 @@ -//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SPLITTER_H -#define LLVM_CODEGEN_SPLITTER_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" - -#include <deque> -#include <map> -#include <string> -#include <vector> - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - struct LiveRange; - class LoopSplit; - class MachineDominatorTree; - class MachineRegisterInfo; - class SlotIndexes; - class TargetInstrInfo; - class VNInfo; - - class LoopSplitter : public MachineFunctionPass { - friend class LoopSplit; - public: - static char ID; - - LoopSplitter() : MachineFunctionPass(ID) { - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - - private: - - MachineFunction *mf; - LiveIntervals *lis; - MachineLoopInfo *mli; - MachineRegisterInfo *mri; - MachineDominatorTree *mdt; - SlotIndexes *sis; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - - std::string fqn; - std::deque<LiveInterval*> intervals; - - typedef std::pair<SlotIndex, SlotIndex> SlotPair; - typedef std::vector<SlotPair> LoopRanges; - typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap; - LoopRangeMap loopRangeMap; - - void dumpLoopInfo(MachineLoop &loop); - - void dumpOddTerminators(); - - void updateTerminators(MachineBasicBlock &mbb); - - bool canInsertPreHeader(MachineLoop &loop); - MachineBasicBlock& insertPreHeader(MachineLoop &loop); - - bool isCriticalEdge(MachineLoop::Edge &edge); - bool canSplitEdge(MachineLoop::Edge &edge); - MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); - - LoopRanges& getLoopRanges(MachineLoop &loop); - std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr, - MachineLoop &loop); - - void dumpLoopRanges(MachineLoop &loop); - - void processHeader(LoopSplit &split); - void processLoopExits(LoopSplit &split); - void processLoopUses(LoopSplit &split); - - bool splitOverLoop(LiveInterval &li, MachineLoop &loop); - - void processInterval(LiveInterval &li); - - void processIntervals(); - }; - -} - -#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 1f0e5a2711ae..43a6ad8c97a4 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { - // We apparently only care about character arrays. - if (!AT->getElementType()->isIntegerTy(8)) - continue; - + if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; - } } } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 57cbe1ba5960..1e940b1d0711 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackcoloring" -#include "VirtRegMap.h" #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" @@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, cl::desc("Suppress slot sharing during stack coloring")); -static cl::opt<bool> -ColorWithRegsOpt("color-ss-with-regs", - cl::init(false), cl::Hidden, - cl::desc("Color stack slots with free registers")); - - static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); -STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); -STATISTIC(NumLoadElim, "Number of loads eliminated"); -STATISTIC(NumStoreElim, "Number of stores eliminated"); STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { bool ColorWithRegs; LiveStacks* LS; - VirtRegMap* VRM; MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; const MachineLoopInfo *loopInfo; // SSIntervals - Spill slot intervals. @@ -98,18 +85,12 @@ namespace { MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } - StackSlotColoring(bool RegColor) : - MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) { - initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); - } - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveStacks>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<VirtRegMap>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreservedID(MachineDominatorsID); @@ -117,9 +98,6 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char* getPassName() const { - return "Stack Slot Coloring"; - } private: void InitializeSlots(); @@ -127,41 +105,23 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, - SmallVector<SmallVector<int, 4>, 16> &RevMap, - BitVector &SlotIsReg); void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF); - bool PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - bool PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, const TargetRegisterClass *RC, - SmallSet<unsigned, 4> &Defs, - MachineFunction &MF); - bool AllMemRefsCanBeUnfolded(int SS); bool RemoveDeadStores(MachineBasicBlock* MBB); }; } // end anonymous namespace char StackSlotColoring::ID = 0; +char &llvm::StackSlotColoringID = StackSlotColoring::ID; INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", "Stack Slot Coloring", false, false) -FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { - return new StackSlotColoring(RegColor); -} - namespace { // IntervalSorter - Comparison predicate that sort live intervals by // their weight. @@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { return false; } -/// ColorSlotsWithFreeRegs - If there are any free registers available, try -/// replacing spill slots references with registers instead. -bool -StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, - SmallVector<SmallVector<int, 4>, 16> &RevMap, - BitVector &SlotIsReg) { - if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) - return false; - - bool Changed = false; - DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); - if (!UsedColors[SS] || li->weight < 20) - // If the weight is < 20, i.e. two references in a loop with depth 1, - // don't bother with it. - continue; - - // These slots allow to share the same registers. - bool AllColored = true; - SmallVector<unsigned, 4> ColoredRegs; - for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { - int RSS = RevMap[SS][j]; - const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); - // If it's not colored to another stack slot, try coloring it - // to a "free" register. - if (!RC) { - AllColored = false; - continue; - } - unsigned Reg = VRM->getFirstUnusedRegister(RC); - if (!Reg) { - AllColored = false; - continue; - } - if (!AllMemRefsCanBeUnfolded(RSS)) { - AllColored = false; - continue; - } else { - DEBUG(dbgs() << "Assigning fi#" << RSS << " to " - << TRI->getName(Reg) << '\n'); - ColoredRegs.push_back(Reg); - SlotMapping[RSS] = Reg; - SlotIsReg.set(RSS); - Changed = true; - } - } - - // Register and its sub-registers are no longer free. - while (!ColoredRegs.empty()) { - unsigned Reg = ColoredRegs.back(); - ColoredRegs.pop_back(); - VRM->setRegisterUsed(Reg); - // If reg is a callee-saved register, it will have to be spilled in - // the prologue. - MRI->setPhysRegUsed(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - VRM->setRegisterUsed(*AS); - MRI->setPhysRegUsed(*AS); - } - } - // This spill slot is dead after the rewrites - if (AllColored) { - MFI->RemoveStackObject(SS); - ++NumEliminated; - } - } - DEBUG(dbgs() << '\n'); - - return Changed; -} - /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. /// int StackSlotColoring::ColorSlot(LiveInterval *li) { @@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector<int, 16> SlotMapping(NumObjs, -1); SmallVector<float, 16> SlotWeights(NumObjs, 0.0); SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); - BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); @@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { DEBUG(dbgs() << '\n'); #endif - // Can we "color" a stack slot with a unused register? - Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); - if (!Changed) return false; // Rewrite all MO_FrameIndex operands. SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { - bool isReg = SlotIsReg[SS]; int NewFI = SlotMapping[SS]; - if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + if (NewFI == -1 || (NewFI == (int)SS)) continue; - const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - if (!isReg) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); - else { - // Rewrite to use a register instead. - unsigned MBBId = RefMIs[i]->getParent()->getNumber(); - SmallSet<unsigned, 4> &Defs = NewDefs[MBBId]; - UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); - } + RewriteInstruction(RefMIs[i], SS, NewFI, MF); } // Delete unused stack slots. @@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { return true; } -/// AllMemRefsCanBeUnfolded - Return true if all references of the specified -/// spill slot index can be unfolded. -bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { - SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { - MachineInstr *MI = RefMIs[i]; - if (TII->isLoadFromStackSlot(MI, SS) || - TII->isStoreToStackSlot(MI, SS)) - // Restore and spill will become copies. - return true; - if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) - return false; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (MO.isFI() && MO.getIndex() != SS) - // If it uses another frameindex, we can, currently* unfold it. - return false; - } - } - return true; -} - /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, @@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, (*I)->setValue(NewSV); } -/// PropagateBackward - Traverse backward and look for the definition of -/// OldReg. If it can successfully update all of the references with NewReg, -/// do so and return true. -bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->begin()) - return false; - - SmallVector<MachineOperand*, 4> Uses; - SmallVector<MachineOperand*, 4> Refs; - while (--MII != MBB->begin()) { - bool FoundDef = false; // Not counting 2address def. - - Uses.clear(); - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register def. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isSubregToReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - - if (MO.isUse()) { - Uses.push_back(&MO); - } else { - Refs.push_back(&MO); - if (!MII->isRegTiedToUseOperand(i)) - FoundDef = true; - } - } else if (TRI->regsOverlap(Reg, NewReg)) { - return false; - } else if (TRI->regsOverlap(Reg, OldReg)) { - if (!MO.isUse() || !MO.isKill()) - return false; - } - } - - if (FoundDef) { - // Found non-two-address def. Stop here. - for (unsigned i = 0, e = Refs.size(); i != e; ++i) - Refs[i]->setReg(NewReg); - return true; - } - - // Two-address uses must be updated as well. - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Refs.push_back(Uses[i]); - } - return false; -} - -/// PropagateForward - Traverse forward and look for the kill of OldReg. If -/// it can successfully update all of the uses with NewReg, do so and -/// return true. -bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->end()) - return false; - - SmallVector<MachineOperand*, 4> Uses; - while (++MII != MBB->end()) { - bool FoundKill = false; - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isDef() || MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register use. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - if (MO.isKill()) - FoundKill = true; - - Uses.push_back(&MO); - } else if (TRI->regsOverlap(Reg, NewReg) || - TRI->regsOverlap(Reg, OldReg)) - return false; - } - if (FoundKill) { - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Uses[i]->setReg(NewReg); - return true; - } - } - return false; -} - -/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding -/// folded memory references and replacing those references with register -/// references instead. -void -StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, - const TargetRegisterClass *RC, - SmallSet<unsigned, 4> &Defs, - MachineFunction &MF) { - MachineBasicBlock *MBB = MI->getParent(); - if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { - if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(dbgs() << "Eliminated load: "); - DEBUG(MI->dump()); - ++NumLoadElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), - DstReg).addReg(Reg); - ++NumRegRepl; - } - - if (!Defs.count(Reg)) { - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { - if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(dbgs() << "Eliminated store: "); - DEBUG(MI->dump()); - ++NumStoreElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(SrcReg); - ++NumRegRepl; - } - - // Remember reg has been defined in MBB. - Defs.insert(Reg); - } else { - SmallVector<MachineInstr*, 4> NewMIs; - bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); - (void)Success; // Silence compiler warning. - assert(Success && "Failed to unfold!"); - MachineInstr *NewMI = NewMIs[0]; - MBB->insert(MI, NewMI); - ++NumRegRepl; - - if (NewMI->readsRegister(Reg)) { - if (!Defs.count(Reg)) - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } - MBB->erase(MI); -} /// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is @@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { I != E; ++I) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - + int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; - + ++NumDead; changed = true; - + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { ++NumDead; toErase.push_back(I); } - + toErase.push_back(NextMI); ++I; } - + for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); - + return changed; } @@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " + << "********** Function: " << MF.getFunction()->getName() << '\n'; }); MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); LS = &getAnalysis<LiveStacks>(); - VRM = &getAnalysis<VirtRegMap>(); loopInfo = &getAnalysis<MachineLoopInfo>(); bool Changed = false; unsigned NumSlots = LS->getNumIntervals(); - if (NumSlots < 2) { - if (NumSlots == 0 || !VRM->HasUnusedRegisters()) - // Nothing to do! - return false; - } + if (NumSlots == 0) + // Nothing to do! + return false; // If there are calls to setjmp or sigsetjmp, don't perform stack slot // coloring. The stack could be modified before the longjmp is executed, // resulting in the wrong value being used afterwards. (See // <rdar://problem/8007500>.) - if (MF.callsSetJmp()) + if (MF.exposesReturnsTwice()) return false; // Gather spill slot references diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 260cc0ee50a5..c6fdc7382435 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { return &MO; } } - return NULL; } bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { @@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } - LI->renumber(); - Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); @@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, // Set the phi-def flag for the VN at this PHI. SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); DestVNI->setIsPHIDef(true); @@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); DestVNI->def = MBBStartIndex; DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getDefIndex(), + PHIIndex.getRegSlot(), DestVNI)); return; } @@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - CopyInstr, LI->getVNInfoAllocator()); CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getDefIndex(), + DestCopyIndex.getRegSlot(), CopyVNI)); // Adjust DestReg's live interval to adjust for its new definition at // CopyInstr. LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI); - DestVNI->def = DestCopyIndex.getDefIndex(); + DestVNI->def = DestCopyIndex.getRegSlot(); InsertedDestCopies[CopyReg] = CopyInstr; } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 3a6211a0f3e6..8ebfbcae785b 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -56,10 +56,10 @@ typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { - bool PreRegAlloc; const TargetInstrInfo *TII; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; + bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. SmallVector<unsigned, 16> SSAUpdateVRs; @@ -70,11 +70,10 @@ namespace { public: static char ID; - explicit TailDuplicatePass(bool PreRA) : - MachineFunctionPass(ID), PreRegAlloc(PreRA) {} + explicit TailDuplicatePass() : + MachineFunctionPass(ID), PreRegAlloc(false) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Tail Duplication"; } private: void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, @@ -118,14 +117,16 @@ namespace { char TailDuplicatePass::ID = 0; } -FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { - return new TailDuplicatePass(PreRegAlloc); -} +char &llvm::TailDuplicateID = TailDuplicatePass::ID; + +INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", + false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + PreRegAlloc = MRI->isSSA(); bool MadeChange = false; while (TailDuplicateBlocks(MF)) @@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MO.setReg(VI->second); } } - PredBB->insert(PredBB->end(), NewMI); + PredBB->insert(PredBB->instr_end(), NewMI); } /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor @@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool HasIndirectbr = false; if (!TailBB.empty()) - HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = 20; @@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); - ++I) { + for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) + if (I->isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) + if (PreRegAlloc && I->isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. - if (PreRegAlloc && I->getDesc().isCall()) + if (PreRegAlloc && I->isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) @@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { ++I; if (I == E) return true; - return I->getDesc().isUnconditionalBranch(); + return I->isUnconditionalBranch(); } static bool @@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - while (I != TailBB->end()) { + // Use instr_iterator here to properly handle bundles, e.g. + // ARM Thumb2 IT block. + MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); + while (I != TailBB->instr_end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { @@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. - if (PrevBB->succ_size() == 1 && + if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { @@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp new file mode 100644 index 000000000000..cadb87815dbe --- /dev/null +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -0,0 +1,45 @@ +//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <cstdlib> +using namespace llvm; + +TargetFrameLowering::~TargetFrameLowering() { +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. This is the default implementation +/// which is overridden for some targets. +int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + +int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // By default, assume all frame indices are referenced via whatever + // getFrameRegister() says. The target can override this if it's doing + // something different. + FrameReg = RI->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); +} diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index f32678f12b0a..2beb9281e35b 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); // If destination is tied to either of the commuted source register, then @@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; Reg0 = Reg2; + SubReg0 = SubReg2; } else if (HasDef && Reg0 == Reg2 && MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { Reg1IsKill = false; Reg0 = Reg1; + SubReg0 = SubReg1; } if (NewMI) { @@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MachineFunction &MF = *MI->getParent()->getParent(); if (HasDef) return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); else return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); } - if (HasDef) + if (HasDef) { MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } MI->getOperand(Idx2).setReg(Reg1); MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); MI->getOperand(Idx1).setIsKill(Reg2IsKill); return MI; @@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; @@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, } +bool +TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + + bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - assert(!Orig->getDesc().isNotDuplicable() && + assert(!Orig->isNotDuplicable() && "Instruction cannot be duplicated"); return MF.CloneMachineInstr(Orig); } @@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && + NewMI->mayStore()) && "Folded a def to a non-store!"); assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && + NewMI->mayLoad()) && "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); @@ -332,7 +361,7 @@ MachineInstr* TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const { - assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); @@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); // Remat clients assume operand 0 is the defined register. if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) @@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const MCInstrDesc &MCID = MI->getDesc(); - // Avoid instructions obviously unsafe for remat. - if (MCID.isNotDuplicable() || MCID.mayStore() || + if (MI->isNotDuplicable() || MI->mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume @@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI.def_empty(Reg)) - return false; - BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0); - if (AllocatableRegs.test(Reg)) + if (!MRI.isConstantPhysReg(Reg, MF)) return false; - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI.def_empty(AliasReg)) - return false; - if (AllocatableRegs.test(AliasReg)) - return false; - } } else { // A physreg def. We can't remat it. return false; @@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const{ // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Don't attempt to schedule around any instruction that defines @@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return (ScheduleHazardRecognizer *) new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } + +int +TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3848f4d4d4c4..9925185be120 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -17,6 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/Module.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: return Mang->getSymbol(GV); - break; case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + Mang->getSymbol(GV)->getName()); - break; } } } @@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); + unsigned Size = TM.getTargetData()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(8); + Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); - const MCExpr *E = MCConstantExpr::Create(8, getContext()); + const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); Streamer.EmitLabel(Label); - unsigned Size = TM.getTargetData()->getPointerSize(); Streamer.EmitSymbolValue(Sym, Size); } @@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, static const char *getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text."; if (Kind.isReadOnly()) return ".rodata."; + if (Kind.isBSS()) return ".bss."; if (Kind.isThreadData()) return ".tdata."; if (Kind.isThreadBSS()) return ".tbss."; @@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. if ((GV->isWeakForLinker() || EmitUniquedSection) && - !Kind.isCommon() && !Kind.isBSS()) { + !Kind.isCommon()) { const char *Prefix; Prefix = getSectionPrefixForGlobal(Kind); @@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +const MCSection * +TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticCtorSection; + + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticDtorSection; + + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + //===----------------------------------------------------------------------===// // MachO //===----------------------------------------------------------------------===// +/// emitModuleFlags - Emit the module flags that specify the garbage collection +/// information. +void TargetLoweringObjectFileMachO:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const { + unsigned VersionVal = 0; + unsigned GCFlags = 0; + StringRef SectionVal; + + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + + // Ignore flags with 'Require' behavior. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + Value *Val = MFE.Val; + + if (Key == "Objective-C Image Info Version") + VersionVal = cast<ConstantInt>(Val)->getZExtValue(); + else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only") + GCFlags |= cast<ConstantInt>(Val)->getZExtValue(); + else if (Key == "Objective-C Image Info Section") + SectionVal = cast<MDString>(Val)->getString(); + } + + // The section is mandatory. If we don't have it, then we don't have GC info. + if (SectionVal.empty()) return; + + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) + // If invalid, report the error with report_fatal_error. + report_fatal_error("Invalid section specifier '" + Section + "': " + + ErrorCode + "."); + + // Get the section. + const MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, + SectionKind::getDataNoRel()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(getContext(). + GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(VersionVal, 4); + Streamer.EmitIntValue(GCFlags, 4); + Streamer.AddBlankLine(); +} + const MCSection *TargetLoweringObjectFileMachO:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { @@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); - // Fall back to dropping it into the data section. - return DataSection; + report_fatal_error("Global variable '" + GV->getName() + + "' has an invalid section specifier '" + + GV->getSection() + "': " + ErrorCode + "."); } // Get the section. @@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); + report_fatal_error("Global variable '" + GV->getName() + + "' section type or attributes does not match previous" + " section specifier"); } return S; @@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : - MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); @@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isThreadLocal()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; + if (Kind.isThreadLocal()) + return ".tls$"; if (Kind.isWriteable()) return ".data$"; return ".rdata$"; @@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { const MCSection *TargetLoweringObjectFileCOFF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Doesn't support TLS"); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. @@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isText()) return getTextSection(); + if (Kind.isThreadLocal()) + return getTLSDataSection(); + return getDataSection(); } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp new file mode 100644 index 000000000000..0f59d0169e18 --- /dev/null +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -0,0 +1,52 @@ +//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods in the TargetOptions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +/// DisableFramePointerElim - This returns true if frame pointer elimination +/// optimization should be disabled for the given machine function. +bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + + return NoFramePointerElim; +} + +/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option +/// is specified on the command line. When this flag is off(default), the +/// code generator is not allowed to generate mad (multiply add) if the +/// result is "less precise" than doing those operations individually. +bool TargetOptions::LessPreciseFPMAD() const { + return UnsafeFPMath || LessPreciseFPMADOption; +} + +/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume +/// that the rounding mode of the FPU can change from its default. +bool TargetOptions::HonorSignDependentRoundingFPMath() const { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; +} + +/// getTrapFunctionName - If this returns a non-empty string, this means isel +/// should lower Intrinsic::trap to a call to the specified function name +/// instead of an ISD::TRAP node. +StringRef TargetOptions::getTrapFunctionName() const { + return TrapFuncName; +} + diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index d87937822280..c30b1333bb2a 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReMats, "Number of instructions re-materialized"); STATISTIC(NumDeletes, "Number of dead instructions deleted"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; // DistanceMap - Keep track the distance of a MI from the start of the // current basic block. @@ -120,6 +125,18 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned Dist); + bool isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, MachineBasicBlock *MBB); + + bool RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, @@ -152,7 +169,6 @@ namespace { AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->getDesc().isTerminator()) + KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the // position and the kill use, then it's not safe to sink it. - // + // // FIXME: This can be sped up if there is an easy way to query whether an // instruction is before or after another instruction. Then we can use // MachineRegisterInfo def / use instead. @@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, KillMO->setIsKill(false); KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); KillMO->setIsKill(true); - + if (LV) LV->replaceKillInstruction(SavedReg, KillMI, MI); @@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, continue; // Current use. OtherUse = true; // There is at least one other use in the MBB that will clobber the - // register. + // register. if (isTwoAddrUse(UseMI, Reg)) return true; } @@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } +/// findLocalKill - Look for an instruction below MI in the MBB that kills the +/// specified register. Returns null if there are any other Reg use between the +/// instructions. +static +MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, + MachineInstr *MI, MachineRegisterInfo *MRI, + DenseMap<MachineInstr*, unsigned> &DistanceMap) { + MachineInstr *KillMI = 0; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI == MI || UseMI->getParent() != MBB) + continue; + if (DistanceMap.count(UseMI)) + continue; + if (!UI.getOperand().isKill()) + return 0; + if (KillMI) + return 0; // -O0 kill markers cannot be trusted? + KillMI = UseMI; + } + + return KillMI; +} + /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -528,6 +570,9 @@ bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { + if (OptLevel == CodeGenOpt::None) + return false; + // Determine if it's profitable to commute this two address instruction. In // general, we want no uses between this instruction and the definition of // the two-address register. @@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, // %reg1029<def> = MOV8rr %reg1028 // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> // insert => %reg1030<def> = MOV8rr %reg1029 - // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> if (!MI->killsRegister(regC)) return false; @@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall()) + if (MI->mayStore() || MI->isCall()) return false; - if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, return true; } +/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill +/// instruction in order to eliminate the need for the copy. +bool +TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() || + KillMI->isBranch() || KillMI->isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else { + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator From = MI; + MachineBasicBlock::iterator To = llvm::next(From); + while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { + Defs.insert(To->getOperand(0).getReg()); + ++To; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + if (MOReg != Reg && + ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) + // Don't want to extend other live ranges and update kills. + return false; + } + } + } + + // Move debug info as well. + while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) + --From; + + // Copies following MI may have been moved as well. + nmi = To; + MBB->splice(KillPos, MBB, From, To); + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + + return true; +} + +/// isDefTooClose - Return true if the re-scheduling will put the given +/// instruction too close to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, + MachineBasicBlock *MBB) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { + MachineInstr *DefMI = &*DI; + if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + continue; + if (DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + return true; + } + return false; +} + +/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the +/// current two-address instruction in order to eliminate the need for the +/// copy. +bool +TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + SmallSet<unsigned, 2> LiveDefs; + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI, MBB)) + return false; + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + SmallVector<unsigned, 2> OtherDefs; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } else { + OtherDefs.push_back(MOReg); + } + } + + for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { + unsigned MOReg = OtherDefs[i]; + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = llvm::next(From); + while (llvm::prior(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + return true; +} + /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if the tied operands -/// are eliminated altogether. +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { - const MCInstrDesc &MCID = mi->getDesc(); - unsigned regA = mi->getOperand(DstIdx).getReg(); - unsigned regB = mi->getOperand(SrcIdx).getReg(); + if (OptLevel == CodeGenOpt::None) + return false; + + MachineInstr &MI = *mi; + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. - bool regBKilled = isKilled(*mi, regB, MRI, TII); - if (!regBKilled && mi->getOperand(DstIdx).isDead() && + bool regBKilled = isKilled(MI, regB, MRI, TII); + if (!regBKilled && MI.getOperand(DstIdx).isDead() && DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. @@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (MCID.isCommutable() && mi->getNumOperands() >= 3 && - TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (MI.isCommutable() && MI.getNumOperands() >= 3 && + TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; else if (SrcIdx == SrcOp2) regCIdx = SrcOp1; if (regCIdx != ~0U) { - regC = mi->getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + regC = MI.getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(MI, regC, MRI, TII)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (MCID.isConvertibleTo3Addr()) { + if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + // If this is an instruction with a load folded into it, try unfolding // the load, e.g. avoid this: // movq %rdx, %rcx @@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (MCID.mayLoad() && !regBKilled) { + if (MI.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = - TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); @@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction &MF = *mbbi->getParent(); // Unfold the load. - DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, mi, Reg, + if (!TII->unfoldMemoryOperand(MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); else { assert(NewMIs[1]->killsRegister(MO.getReg()) && "Kill missing after load unfold!"); - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); } } - } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { if (NewMIs[1]->registerDefIsDead(MO.getReg())) LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); else { @@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } - mi->eraseFromParent(); + MI.eraseFromParent(); mi = NewMIs[1]; if (TransformSuccess) return true; @@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + InstrItins = TM.getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); + OptLevel = TM.getOptLevel(); bool MadeChange = false; DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); - DEBUG(dbgs() << "********** Function: " + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. @@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && - DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, AA, regB) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); @@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); - } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } } // Clear TiedOperands here instead of at the top of the loop @@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg, } } +// Find the first def of Reg, assuming they are all in the same basic block. +static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { + SmallPtrSet<MachineInstr*, 8> Defs; + MachineInstr *First = 0; + for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg); + MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI)) + First = MI; + if (!First) + return 0; + + MachineBasicBlock *MBB = First->getParent(); + MachineBasicBlock::iterator A = First, B = First; + bool Moving; + do { + Moving = false; + if (A != MBB->begin()) { + Moving = true; + --A; + if (Defs.erase(A)) First = A; + } + if (B != MBB->end()) { + Defs.erase(B); + ++B; + Moving = true; + } + } while (Moving && !Defs.empty()); + assert(Defs.empty() && "Instructions outside basic block!"); + return First; +} + /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are /// EXTRACT_SUBREG from the same register and to the same virtual register /// with different sub-register indices, attempt to combine the @@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, CanCoalesce = false; break; } - // Keep track of one of the uses. - SomeMI = UseMI; + // Keep track of one of the uses. Preferably the first one which has a + // <def,undef> flag. + if (!SomeMI || UseMI->getOperand(0).isUndef()) + SomeMI = UseMI; } if (!CanCoalesce) continue; @@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, SomeMI->getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(DstReg, RegState::Define | + getUndefRegState(SomeMI->getOperand(0).isUndef()), + NewDstSubIdx) .addReg(SrcReg, 0, NewSrcSubIdx); // Remove all the old extract instructions. @@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); - if (MI->getOperand(i).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); - llvm_unreachable(0); + // DefMI of NULL means the value does not have a vreg in this block + // i.e., its a physical register or a subreg. + // In either case we force a copy to be generated. + MachineInstr *DefMI = NULL; + if (!MI->getOperand(i).getSubReg() && + !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); } - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) { + if (DefMI && DefMI->isImplicitDef()) { DefMI->eraseFromParent(); continue; } IsImpDef = false; // Remember COPY sources. These might be candidate for coalescing. - if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) + if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) RealSrcs.push_back(DefMI->getOperand(1).getReg()); bool isKill = MI->getOperand(i).isKill(); - if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + if (!DefMI || !Seen.insert(SrcReg) || + MI->getParent() != DefMI->getParent() || !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) || !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), MRI->getRegClass(SrcReg), SubIdx)) { @@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addReg(DstReg, RegState::Define, SubIdx) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx); MI->getOperand(i).setReg(0); - if (LV && isKill) + if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, CopyMI); DEBUG(dbgs() << "Inserted: " << *CopyMI); } @@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } + // Set <def,undef> flags on the first DstReg def in the basic block. + // It marks the beginning of the live range. All the other defs are + // read-modify-write. + if (MachineInstr *Def = findFirstDef(DstReg, MRI)) { + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + MachineOperand &MO = Def->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) + MO.setIsUndef(); + } + // Make sure there is a full non-subreg imp-def operand on the + // instruction. This shouldn't be necessary, but it seems that at least + // RAFast requires it. + Def->addRegisterDefined(DstReg, TRI); + DEBUG(dbgs() << "First def: " << *Def); + } + if (IsImpDef) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); + MI->RemoveOperand(j); } else { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 8a1cdc01c494..3bab93bdc098 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -16,10 +16,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "virtregmap" +#define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,12 +31,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include <algorithm> using namespace llvm; @@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { TRI = mf.getTarget().getRegisterInfo(); MF = &mf; - ReMatId = MAX_STACK_SLOT+1; - LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; - Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); - Virt2ReMatIdMap.clear(); Virt2SplitMap.clear(); - Virt2SplitKillMap.clear(); - ReMatMap.clear(); - ImplicitDefed.clear(); - SpillSlotToUsesMap.clear(); - MI2VirtMap.clear(); - SpillPt2VirtMap.clear(); - RestorePt2VirtMap.clear(); - EmergencySpillMap.clear(); - EmergencySpillSlots.clear(); - - SpillSlotToUsesMap.resize(8); - ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs()); - - allocatableRCRegs.clear(); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - allocatableRCRegs.insert(std::make_pair(*I, - TRI->getAllocatableSet(mf, *I))); grow(); - return false; } @@ -93,24 +65,12 @@ void VirtRegMap::grow() { unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); Virt2PhysMap.resize(NumRegs); Virt2StackSlotMap.resize(NumRegs); - Virt2ReMatIdMap.resize(NumRegs); Virt2SplitMap.resize(NumRegs); - Virt2SplitKillMap.resize(NumRegs); - ReMatMap.resize(NumRegs); - ImplicitDefed.resize(NumRegs); } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - assert(SS >= LowSpillSlot && "Unexpected low spill slot"); - unsigned Idx = SS-LowSpillSlot; - while (Idx >= SpillSlotToUsesMap.size()) - SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); ++NumSpillSlots; return SS; } @@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -int VirtRegMap::assignVirtReMatId(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = ReMatId; - return ReMatId++; -} - -void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = id; -} - -int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { - std::map<const TargetRegisterClass*, int>::iterator I = - EmergencySpillSlots.find(RC); - if (I != EmergencySpillSlots.end()) - return I->second; - return EmergencySpillSlots[RC] = createSpillSlot(RC); -} - -void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { - if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { - // If FI < LowSpillSlot, this stack reference was produced by - // instruction selection and is not a spill - if (FI >= LowSpillSlot) { - assert(FI >= 0 && "Spill slot index should not be negative!"); - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); - } - } -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, - MachineInstr *NewMI, ModRef MRInfo) { - // Move previous memory references folded to new instruction. - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); - for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), - E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { - MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); - MI2VirtMap.erase(I++); - } - - // add new memory reference - MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); - MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isFI()) - continue; - int FI = MO.getIndex(); - if (MF->getFrameInfo()->isFixedObjectIndex(FI)) - continue; - // This stack reference was produced by instruction selection and - // is not a spill - if (FI < LowSpillSlot) - continue; - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); - } - MI2VirtMap.erase(MI); - SpillPt2VirtMap.erase(MI); - RestorePt2VirtMap.erase(MI); - EmergencySpillMap.erase(MI); -} - -/// FindUnusedRegisters - Gather a list of allocatable registers that -/// have not been allocated to any virtual register. -bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { - unsigned NumRegs = TRI->getNumRegs(); - UnusedRegs.reset(); - UnusedRegs.resize(NumRegs); - - BitVector Used(NumRegs); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) - Used.set(Virt2PhysMap[Reg]); - } - - BitVector Allocatable = TRI->getAllocatableSet(*MF); - bool AnyUnused = false; - for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { - if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { - bool ReallyUnused = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - if (Used[*AS] || LIs->hasInterval(*AS)) { - ReallyUnused = false; - break; - } - } - if (ReallyUnused) { - AnyUnused = true; - UnusedRegs.set(Reg); - } - } - } - - return AnyUnused; -} - void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " @@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; +#ifndef NDEBUG + BitVector Reserved = TRI->getReservedRegs(*MF); +#endif for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); - for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); - MII != MIE;) { + for (MachineBasicBlock::instr_iterator + MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; + + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); unsigned PhysReg = getPhys(VirtReg); assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); + assert(!Reserved.test(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { @@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 03abff356934..8cac31137e3d 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -18,22 +18,14 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include <map> namespace llvm { - class LiveIntervals; class MachineInstr; class MachineFunction; class MachineRegisterInfo; class TargetInstrInfo; - class TargetRegisterInfo; class raw_ostream; class SlotIndexes; @@ -45,18 +37,12 @@ namespace llvm { MAX_STACK_SLOT = (1L << 18)-1 }; - enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; - typedef std::multimap<MachineInstr*, - std::pair<unsigned, ModRef> > MI2VirtMapTy; - private: MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineFunction *MF; - DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs; - /// Virt2PhysMap - This is a virtual to physical register /// mapping. Each virtual register is required to have an entry in /// it; even spilled virtual registers (the register mapped to a @@ -70,71 +56,10 @@ namespace llvm { /// at. IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap; - /// Virt2ReMatIdMap - This is virtual register to rematerialization id - /// mapping. Each spilled virtual register that should be remat'd has an - /// entry in it which corresponds to the remat id. - IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap; - /// Virt2SplitMap - This is virtual register to splitted virtual register /// mapping. IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap; - /// Virt2SplitKillMap - This is splitted virtual register to its last use - /// (kill) index mapping. - IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap; - - /// ReMatMap - This is virtual register to re-materialized instruction - /// mapping. Each virtual register whose definition is going to be - /// re-materialized has an entry in it. - IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap; - - /// MI2VirtMap - This is MachineInstr to virtual register - /// mapping. In the case of memory spill code being folded into - /// instructions, we need to know which virtual register was - /// read/written by this instruction. - MI2VirtMapTy MI2VirtMap; - - /// SpillPt2VirtMap - This records the virtual registers which should - /// be spilled right after the MachineInstr due to live interval - /// splitting. - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > > - SpillPt2VirtMap; - - /// RestorePt2VirtMap - This records the virtual registers which should - /// be restored right before the MachineInstr due to live interval - /// splitting. - std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap; - - /// EmergencySpillMap - This records the physical registers that should - /// be spilled / restored around the MachineInstr since the register - /// allocator has run out of registers. - std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap; - - /// EmergencySpillSlots - This records emergency spill slots used to - /// spill physical registers when the register allocator runs out of - /// registers. Ideally only one stack slot is used per function per - /// register class. - std::map<const TargetRegisterClass*, int> EmergencySpillSlots; - - /// ReMatId - Instead of assigning a stack slot to a to be rematerialized - /// virtual register, an unique id is being assigned. This keeps track of - /// the highest id used so far. Note, this starts at (1<<18) to avoid - /// conflicts with stack slot numbers. - int ReMatId; - - /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. - int LowSpillSlot, HighSpillSlot; - - /// SpillSlotToUsesMap - Records uses for each register spill slot. - SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap; - - /// ImplicitDefed - One bit for each virtual register. If set it indicates - /// the register is implicitly defined. - BitVector ImplicitDefed; - - /// UnusedRegs - A list of physical registers that have not been used. - BitVector UnusedRegs; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -144,11 +69,7 @@ namespace llvm { public: static char ID; VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), - Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), - ReMatId(MAX_STACK_SLOT+1), - LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -235,8 +156,7 @@ namespace llvm { /// @brief returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { - if (getStackSlot(virtReg) == NO_STACK_SLOT && - getReMatId(virtReg) == NO_STACK_SLOT) + if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. @@ -250,13 +170,6 @@ namespace llvm { return Virt2StackSlotMap[virtReg]; } - /// @brief returns the rematerialization id mapped to the specified virtual - /// register - int getReMatId(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2ReMatIdMap[virtReg]; - } - /// @brief create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); @@ -264,250 +177,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - int assignVirtReMatId(unsigned virtReg); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - void assignVirtReMatId(unsigned virtReg, int id); - - /// @brief returns true if the specified virtual register is being - /// re-materialized. - bool isReMaterialized(unsigned virtReg) const { - return ReMatMap[virtReg] != NULL; - } - - /// @brief returns the original machine instruction being re-issued - /// to re-materialize the specified virtual register. - MachineInstr *getReMaterializedMI(unsigned virtReg) const { - return ReMatMap[virtReg]; - } - - /// @brief records the specified virtual register will be - /// re-materialized and the original instruction which will be re-issed - /// for this purpose. If parameter all is true, then all uses of the - /// registers are rematerialized and it's safe to delete the definition. - void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { - ReMatMap[virtReg] = def; - } - - /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, SlotIndex index) { - Virt2SplitKillMap[virtReg] = index; - } - - SlotIndex getKillPoint(unsigned virtReg) const { - return Virt2SplitKillMap[virtReg]; - } - - /// @brief remove the last use (kill) of a split virtual register. - void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = SlotIndex(); - } - - /// @brief returns true if the specified MachineInstr is a spill point. - bool isSpillPt(MachineInstr *Pt) const { - return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be spilled due to - /// splitting right after the specified MachineInstr. - std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) { - return SpillPt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a spill point for virtReg. - void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator - I = SpillPt2VirtMap.find(Pt); - if (I != SpillPt2VirtMap.end()) - I->second.push_back(std::make_pair(virtReg, isKill)); - else { - std::vector<std::pair<unsigned,bool> > Virts; - Virts.push_back(std::make_pair(virtReg, isKill)); - SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer spill point information from one instruction to - /// another. - void transferSpillPts(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator - I = SpillPt2VirtMap.find(Old); - if (I == SpillPt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back().first; - bool isKill = I->second.back().second; - I->second.pop_back(); - addSpillPoint(virtReg, isKill, New); - } - SpillPt2VirtMap.erase(I); - } - - /// @brief returns true if the specified MachineInstr is a restore point. - bool isRestorePt(MachineInstr *Pt) const { - return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be restoreed due to - /// splitting right after the specified MachineInstr. - std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) { - return RestorePt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a restore point for virtReg. - void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { - std::map<MachineInstr*, std::vector<unsigned> >::iterator I = - RestorePt2VirtMap.find(Pt); - if (I != RestorePt2VirtMap.end()) - I->second.push_back(virtReg); - else { - std::vector<unsigned> Virts; - Virts.push_back(virtReg); - RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer restore point information from one instruction to - /// another. - void transferRestorePts(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*, std::vector<unsigned> >::iterator I = - RestorePt2VirtMap.find(Old); - if (I == RestorePt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addRestorePoint(virtReg, New); - } - RestorePt2VirtMap.erase(I); - } - - /// @brief records that the specified physical register must be spilled - /// around the specified machine instr. - void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { - if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) - EmergencySpillMap[MI].push_back(PhysReg); - else { - std::vector<unsigned> PhysRegs; - PhysRegs.push_back(PhysReg); - EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); - } - } - - /// @brief returns true if one or more physical registers must be spilled - /// around the specified instruction. - bool hasEmergencySpills(MachineInstr *MI) const { - return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); - } - - /// @brief returns the physical registers to be spilled and restored around - /// the instruction. - std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) { - return EmergencySpillMap[MI]; - } - - /// @brief - transfer emergency spill information from one instruction to - /// another. - void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { - std::map<MachineInstr*,std::vector<unsigned> >::iterator I = - EmergencySpillMap.find(Old); - if (I == EmergencySpillMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addEmergencySpill(virtReg, New); - } - EmergencySpillMap.erase(I); - } - - /// @brief return or get a emergency spill slot for the register class. - int getEmergencySpillSlot(const TargetRegisterClass *RC); - - /// @brief Return lowest spill slot index. - int getLowSpillSlot() const { - return LowSpillSlot; - } - - /// @brief Return highest spill slot index. - int getHighSpillSlot() const { - return HighSpillSlot; - } - - /// @brief Records a spill slot use. - void addSpillSlotUse(int FrameIndex, MachineInstr *MI); - - /// @brief Returns true if spill slot has been used. - bool isSpillSlotUsed(int FrameIndex) const { - assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); - return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); - } - - /// @brief Mark the specified register as being implicitly defined. - void setIsImplicitlyDefined(unsigned VirtReg) { - ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - - /// @brief Returns true if the virtual register is implicitly defined. - bool isImplicitlyDefined(unsigned VirtReg) const { - return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)]; - } - - /// @brief Updates information about the specified virtual register's value - /// folded into newMI machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, - ModRef MRInfo); - - /// @brief Updates information about the specified virtual register's value - /// folded into the specified machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); - - /// @brief returns the virtual registers' values folded in memory - /// operands of this instruction - std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator> - getFoldedVirts(MachineInstr* MI) const { - return MI2VirtMap.equal_range(MI); - } - - /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the - /// the folded instruction map and spill point map. - void RemoveMachineInstrFromMaps(MachineInstr *MI); - - /// FindUnusedRegisters - Gather a list of allocatable registers that - /// have not been allocated to any virtual register. - bool FindUnusedRegisters(LiveIntervals* LIs); - - /// HasUnusedRegisters - Return true if there are any allocatable registers - /// that have not been allocated to any virtual register. - bool HasUnusedRegisters() const { - return !UnusedRegs.none(); - } - - /// setRegisterUsed - Remember the physical register is now used. - void setRegisterUsed(unsigned Reg) { - UnusedRegs.reset(Reg); - } - - /// isRegisterUnused - Return true if the physical register has not been - /// used. - bool isRegisterUnused(unsigned Reg) const { - return UnusedRegs[Reg]; - } - - /// getFirstUnusedRegister - Return the first physical register that has not - /// been used. - unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { - int Reg = UnusedRegs.find_first(); - while (Reg != -1) { - if (allocatableRCRegs[RC][Reg]) - return (unsigned)Reg; - Reg = UnusedRegs.find_next(Reg); - } - return 0; - } - /// rewrite - Rewrite all instructions in MF to use only physical registers /// by mapping all virtual register operands to their assigned physical /// registers. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp deleted file mode 100644 index a5ec797b27db..000000000000 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ /dev/null @@ -1,2633 +0,0 @@ -//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "virtregrewriter" -#include "VirtRegRewriter.h" -#include "VirtRegMap.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumDSE , "Number of dead stores elided"); -STATISTIC(NumDSS , "Number of dead spill slots removed"); -STATISTIC(NumCommutes, "Number of instructions commuted"); -STATISTIC(NumDRM , "Number of re-materializable defs elided"); -STATISTIC(NumStores , "Number of stores added"); -STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omitted"); -STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); -STATISTIC(NumCopified, "Number of available reloads turned into copies"); -STATISTIC(NumReMats , "Number of re-materialization"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumReused , "Number of values reused"); -STATISTIC(NumDCE , "Number of copies elided"); -STATISTIC(NumSUnfold , "Number of stores unfolded"); -STATISTIC(NumModRefUnfold, "Number of modref unfolded"); - -namespace { - enum RewriterName { local, trivial }; -} - -static cl::opt<RewriterName> -RewriterOpt("rewriter", - cl::desc("Rewriter to use (default=local)"), - cl::Prefix, - cl::values(clEnumVal(local, "local rewriter"), - clEnumVal(trivial, "trivial rewriter"), - clEnumValEnd), - cl::init(local)); - -static cl::opt<bool> -ScheduleSpills("schedule-spills", - cl::desc("Schedule spill code"), - cl::init(false)); - -VirtRegRewriter::~VirtRegRewriter() {} - -/// substitutePhysReg - Replace virtual register in MachineOperand with a -/// physical register. Do the right thing with the sub-register index. -/// Note that operands may be added, so the MO reference is no longer valid. -static void substitutePhysReg(MachineOperand &MO, unsigned Reg, - const TargetRegisterInfo &TRI) { - if (MO.getSubReg()) { - MO.substPhysReg(Reg, TRI); - - // Any kill flags apply to the full virtual register, so they also apply to - // the full physical register. - // We assume that partial defs have already been decorated with a super-reg - // <imp-def> operand by LiveIntervals. - MachineInstr &MI = *MO.getParent(); - if (MO.isUse() && !MO.isUndef() && - (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) - MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); - } else { - MO.setReg(Reg); - } -} - -namespace { - -/// This class is intended for use with the new spilling framework only. It -/// rewrites vreg def/uses to use the assigned preg, but does not insert any -/// spill code. -struct TrivialRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); - DEBUG(dbgs() << "**** Machine Instrs" - << "(NOTE! Does not include spills and reloads!) ****\n"); - DEBUG(MF.dump()); - - MachineRegisterInfo *mri = &MF.getRegInfo(); - const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); - - bool changed = false; - - for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = li->reg; - - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - if (!li->empty()) - mri->setPhysRegUsed(reg); - } - else { - if (!VRM.hasPhys(reg)) - continue; - unsigned pReg = VRM.getPhys(reg); - mri->setPhysRegUsed(pReg); - // Copy the register use-list before traversing it. - SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), - E = mri->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - for (unsigned N=0; N != reglist.size(); ++N) - substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), - pReg, *tri); - changed |= !reglist.empty(); - } - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.dump()); - - return changed; - } - -}; - -} - -// ************************************************************************ // - -namespace { - -/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB -/// from top down, keep track of which spill slots or remat are available in -/// each register. -/// -/// Note that not all physregs are created equal here. In particular, some -/// physregs are reloads that we are allowed to clobber or ignore at any time. -/// Other physregs are values that the register allocated program is using -/// that we cannot CHANGE, but we can read if we like. We keep track of this -/// on a per-stack-slot / remat id basis as the low bit in the value of the -/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks -/// this bit and addAvailable sets it if. -class AvailableSpills { - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled - // or remat'ed virtual register values that are still available, due to - // being loaded or stored to, but not invalidated yet. - std::map<int, unsigned> SpillSlotsOrReMatsAvailable; - - // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, - // indicating which stack slot values are currently held by a physreg. This - // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a - // physreg is modified. - std::multimap<unsigned, int> PhysRegsAvailable; - - void disallowClobberPhysRegOnly(unsigned PhysReg); - - void ClobberPhysRegOnly(unsigned PhysReg); -public: - AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) - : TRI(tri), TII(tii) { - } - - /// clear - Reset the state. - void clear() { - SpillSlotsOrReMatsAvailable.clear(); - PhysRegsAvailable.clear(); - } - - const TargetRegisterInfo *getRegInfo() const { return TRI; } - - /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is - /// available in a physical register, return that PhysReg, otherwise - /// return 0. - unsigned getSpillSlotOrReMatPhysReg(int Slot) const { - std::map<int, unsigned>::const_iterator I = - SpillSlotsOrReMatsAvailable.find(Slot); - if (I != SpillSlotsOrReMatsAvailable.end()) { - return I->second >> 1; // Remove the CanClobber bit. - } - return 0; - } - - /// addAvailable - Mark that the specified stack slot / remat is available - /// in the specified physreg. If CanClobber is true, the physreg can be - /// modified at any time without changing the semantics of the program. - void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { - // If this stack slot is thought to be available in some other physreg, - // remove its record. - ModifyStackSlotOrReMat(SlotOrReMat); - - PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); - SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | - (unsigned)CanClobber; - - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Remembering RM#" - << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); - DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) - << (CanClobber ? " canclobber" : "") << "\n"); - } - - /// canClobberPhysRegForSS - Return true if the spiller is allowed to change - /// the value of the specified stackslot register if it desires. The - /// specified stack slot must be available in a physreg for this query to - /// make sense. - bool canClobberPhysRegForSS(int SlotOrReMat) const { - assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && - "Value not available!"); - return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; - } - - /// canClobberPhysReg - Return true if the spiller is allowed to clobber the - /// physical register where values for some stack slot(s) might be - /// available. - bool canClobberPhysReg(unsigned PhysReg) const { - std::multimap<unsigned, int>::const_iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - if (!canClobberPhysRegForSS(SlotOrReMat)) - return false; - } - return true; - } - - /// disallowClobberPhysReg - Unset the CanClobber bit of the specified - /// stackslot register. The register is still available but is no longer - /// allowed to be modifed. - void disallowClobberPhysReg(unsigned PhysReg); - - /// ClobberPhysReg - This is called when the specified physreg changes - /// value. We use this to invalidate any info about stuff that lives in - /// it and any of its aliases. - void ClobberPhysReg(unsigned PhysReg); - - /// ModifyStackSlotOrReMat - This method is called when the value in a stack - /// slot changes. This removes information about which register the - /// previous value for this slot lives in (as the previous value is dead - /// now). - void ModifyStackSlotOrReMat(int SlotOrReMat); - - /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, - /// other stack slots sharing the same register are no longer valid. - void ClobberSharingStackSlots(int StackSlot); - - /// AddAvailableRegsToLiveIn - Availability information is being kept coming - /// into the specified MBB. Add available physical registers as potential - /// live-in's. If they are reused in the MBB, they will be added to the - /// live-in set to make register scavenger and post-allocation scheduler. - void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); -}; - -} - -// ************************************************************************ // - -// Given a location where a reload of a spilled register or a remat of -// a constant is to be inserted, attempt to find a safe location to -// insert the load at an earlier point in the basic-block, to hide -// latency of the load and to avoid address-generation interlock -// issues. -static MachineBasicBlock::iterator -ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, - MachineBasicBlock::iterator const Begin, - unsigned PhysReg, - const TargetRegisterInfo *TRI, - bool DoReMat, - int SSorRMId, - const TargetInstrInfo *TII, - const MachineFunction &MF) -{ - if (!ScheduleSpills) - return InsertLoc; - - // Spill backscheduling is of primary interest to addresses, so - // don't do anything if the register isn't in the register class - // used for pointers. - - const TargetLowering *TL = MF.getTarget().getTargetLowering(); - - if (!TL->isTypeLegal(TL->getPointerTy())) - // Believe it or not, this is true on 16-bit targets like PIC16. - return InsertLoc; - - const TargetRegisterClass *ptrRegClass = - TL->getRegClassFor(TL->getPointerTy()); - if (!ptrRegClass->contains(PhysReg)) - return InsertLoc; - - // Scan upwards through the preceding instructions. If an instruction doesn't - // reference the stack slot or the register we're loading, we can - // backschedule the reload up past it. - MachineBasicBlock::iterator NewInsertLoc = InsertLoc; - while (NewInsertLoc != Begin) { - MachineBasicBlock::iterator Prev = prior(NewInsertLoc); - for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { - MachineOperand &Op = Prev->getOperand(i); - if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) - goto stop; - } - if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || - Prev->findRegisterDefOperand(PhysReg)) - goto stop; - for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) - if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || - Prev->findRegisterDefOperand(*Alias)) - goto stop; - NewInsertLoc = Prev; - } -stop:; - - // If we made it to the beginning of the block, turn around and move back - // down just past any existing reloads. They're likely to be reloads/remats - // for instructions earlier than what our current reload/remat is for, so - // they should be scheduled earlier. - if (NewInsertLoc == Begin) { - int FrameIdx; - while (InsertLoc != NewInsertLoc && - (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || - TII->isTriviallyReMaterializable(NewInsertLoc))) - ++NewInsertLoc; - } - - return NewInsertLoc; -} - -namespace { - -// ReusedOp - For each reused operand, we keep track of a bit of information, -// in case we need to rollback upon processing a new operand. See comments -// below. -struct ReusedOp { - // The MachineInstr operand that reused an available value. - unsigned Operand; - - // StackSlotOrReMat - The spill slot or remat id of the value being reused. - unsigned StackSlotOrReMat; - - // PhysRegReused - The physical register the value was available in. - unsigned PhysRegReused; - - // AssignedPhysReg - The physreg that was assigned for use by the reload. - unsigned AssignedPhysReg; - - // VirtReg - The virtual register itself. - unsigned VirtReg; - - ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, - unsigned vreg) - : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), - AssignedPhysReg(apr), VirtReg(vreg) {} -}; - -/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that -/// is reused instead of reloaded. -class ReuseInfo { - MachineInstr &MI; - std::vector<ReusedOp> Reuses; - BitVector PhysRegsClobbered; -public: - ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { - PhysRegsClobbered.resize(tri->getNumRegs()); - } - - bool hasReuses() const { - return !Reuses.empty(); - } - - /// addReuse - If we choose to reuse a virtual register that is already - /// available instead of reloading it, remember that we did so. - void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, - unsigned PhysRegReused, unsigned AssignedPhysReg, - unsigned VirtReg) { - // If the reload is to the assigned register anyway, no undo will be - // required. - if (PhysRegReused == AssignedPhysReg) return; - - // Otherwise, remember this. - Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, - AssignedPhysReg, VirtReg)); - } - - void markClobbered(unsigned PhysReg) { - PhysRegsClobbered.set(PhysReg); - } - - bool isClobbered(unsigned PhysReg) const { - return PhysRegsClobbered.test(PhysReg); - } - - /// GetRegForReload - We are about to emit a reload into PhysReg. If there - /// is some other operand that is using the specified register, either pick - /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, - MachineFunction &MF, MachineInstr *MI, - AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - SmallSet<unsigned, 8> &Rejected, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM); - - /// GetRegForReload - Helper for the above GetRegForReload(). Add a - /// 'Rejected' set to remember which registers have been considered and - /// rejected for the reload. This avoids infinite looping in case like - /// this: - /// t1 := op t2, t3 - /// t2 <- assigned r0 for use by the reload but ended up reuse r1 - /// t3 <- assigned r1 for use by the reload but ended up reuse r0 - /// t1 <- desires r1 - /// sees r1 is taken by t2, tries t2's reload register r0 - /// sees r0 is taken by t3, tries t3's reload register r1 - /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM) { - SmallSet<unsigned, 8> Rejected; - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); - return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } -}; - -} - -// ****************** // -// Utility Functions // -// ****************** // - -/// findSinglePredSuccessor - Return via reference a vector of machine basic -/// blocks each of which is a successor of the specified BB and has no other -/// predecessor. -static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl<MachineBasicBlock *> &Succs){ - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->pred_size() == 1) - Succs.push_back(SuccMBB); - } -} - -/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed -/// but not re-defined and it's being reused. Remove the kill flag for the -/// register and unset the kill's marker and last kill operand. -static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n"); - - MachineOperand *KillOp = KillOps[Reg]; - KillOp->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOp->getReg(); - if (!RegKills[KReg]) - return; - - assert(KillOps[KReg]->getParent() == KillOp->getParent() && - "invalid superreg kill flags"); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - // If it's a def of a super-register. Its other sub-regsters are no - // longer killed as well. - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - - assert(KillOps[*SR]->getParent() == KillOp->getParent() && - "invalid subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } -} - -/// ResurrectKill - Invalidate kill info associated with a previous MI. An -/// optimization may have decided that it's safe to reuse a previously killed -/// register. If we fail to erase the invalid kill flags, then the register -/// scavenger may later clobber the register used by this MI. Note that this -/// must be done even if this MI is being deleted! Consider: -/// -/// USE $r1 (vreg1) <kill> -/// ... -/// $r1(vreg3) = COPY $r1 (vreg2) -/// -/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially -/// vreg1's only use is a kill. The rewriter doesn't know it should be live -/// until it rewrites vreg2. At that points it sees that the copy is dead and -/// deletes it. However, deleting the copy implicitly forwards liveness of $r1 -/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at -/// vreg1 before deleting the copy. -static void ResurrectKill(MachineInstr &MI, unsigned Reg, - const TargetRegisterInfo* TRI, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - return; - } - // No previous kill for this reg. Check for subreg kills as well. - // d4 = - // store d4, fi#0 - // ... - // = s8<kill> - // ... - // = d4 <avoiding reload> - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - unsigned SReg = *SR; - if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) - ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps); - } -} - -/// InvalidateKills - MI is going to be deleted. If any of its operands are -/// marked kill, then invalidate the information. -static void InvalidateKills(MachineInstr &MI, - const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - SmallVector<unsigned, 2> *KillRegs = NULL) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (KillRegs) - KillRegs->push_back(Reg); - assert(Reg < KillOps.size()); - if (KillOps[Reg] == &MO) { - // This operand was the kill, now no longer. - KillOps[Reg] = NULL; - RegKills.reset(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - if (RegKills[*SR]) { - assert(KillOps[*SR] == &MO && "bad subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } - } - else { - // This operand may have reused a previously killed reg. Keep it live in - // case it continues to be used after erasing this instruction. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - } - } -} - -/// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since its spill instruction is removed), mark it isDead. Also checks if -/// the def MI has other definition operands that are not dead. Returns it by -/// reference. -static bool InvalidateRegDef(MachineBasicBlock::iterator I, - MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef, - const TargetRegisterInfo *TRI) { - // Due to remat, it's possible this reg isn't being reused. That is, - // the def of this reg (by prev MI) is now dead. - MachineInstr *DefMI = I; - MachineOperand *DefOp = NULL; - for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) - continue; - if (MO.getReg() == Reg) - DefOp = &MO; - else if (!MO.isDead()) - HasLiveDef = true; - } - if (!DefOp) - return false; - - bool FoundUse = false, Done = false; - MachineBasicBlock::iterator E = &NewDef; - ++I; ++E; - for (; !Done && I != E; ++I) { - MachineInstr *NMI = I; - for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() == 0 || - (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) - continue; - if (MO.isUse()) - FoundUse = true; - Done = true; // Stop after scanning all the operands of this MI. - } - } - if (!FoundUse) { - // Def is dead! - DefOp->setIsDead(); - return true; - } - return false; -} - -/// UpdateKills - Track and update kill info. If a MI reads a register that is -/// marked kill, then it must be due to register reuse. Transfer the kill info -/// over. -static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - // These do not affect kill info at all. - if (MI.isDebugValue()) - return; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - // This operand may have reused a previously killed reg. Keep it live. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - - if (MO.isKill()) { - RegKills.set(Reg); - KillOps[Reg] = &MO; - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.set(*SR); - KillOps[*SR] = &MO; - } - } - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - RegKills.reset(Reg); - KillOps[Reg] = NULL; - // It also defines (or partially define) aliases. - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - } -} - -/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. -/// -static void ReMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - unsigned DestReg, unsigned Reg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); -#ifndef NDEBUG - const MCInstrDesc &MCID = ReMatDefMI->getDesc(); - assert(MCID.getNumDefs() == 1 && - "Don't know how to remat instructions that define > 1 values!"); -#endif - TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); - MachineInstr *NewMI = prior(MII); - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) - continue; - assert(MO.isUse()); - unsigned Phys = VRM.getPhys(VirtReg); - assert(Phys && "Virtual register is not assigned a register?"); - substitutePhysReg(MO, Phys, *TRI); - } - ++NumReMats; -} - -/// findSuperReg - Find the SubReg's super-register of given register class -/// where its SubIdx sub-register is SubReg. -static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, - unsigned SubIdx, const TargetRegisterInfo *TRI) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - unsigned Reg = *I; - if (TRI->getSubReg(Reg, SubIdx) == SubReg) - return Reg; - } - return 0; -} - -// ******************************** // -// Available Spills Implementation // -// ******************************** // - -/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified -/// stackslot register. The register is still available but is no longer -/// allowed to be modifed. -void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { - std::multimap<unsigned, int>::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"); - } -} - -/// disallowClobberPhysReg - Unset the CanClobber bit of the specified -/// stackslot register and its aliases. The register and its aliases may -/// still available but is no longer allowed to be modifed. -void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - disallowClobberPhysRegOnly(*AS); - disallowClobberPhysRegOnly(PhysReg); -} - -/// ClobberPhysRegOnly - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in it. -void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { - std::multimap<unsigned, int>::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - PhysRegsAvailable.erase(I++); - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "); - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); - else - DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); - } -} - -/// ClobberPhysReg - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in -/// it and any of its aliases. -void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - ClobberPhysRegOnly(*AS); - ClobberPhysRegOnly(PhysReg); -} - -/// AddAvailableRegsToLiveIn - Availability information is being kept coming -/// into the specified MBB. Add available physical registers as potential -/// live-in's. If they are reused in the MBB, they will be added to the -/// live-in set to make register scavenger and post-allocation scheduler. -void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - std::set<unsigned> NotAvailable; - for (std::multimap<unsigned, int>::iterator - I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); - I != E; ++I) { - unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); - // FIXME: A temporary workaround. We can't reuse available value if it's - // not safe to move the def of the virtual register's class. e.g. - // X86::RFP* register classes. Do not add it as a live-in. - if (!TII->isSafeToMoveRegClassDefs(RC)) - // This is no longer available. - NotAvailable.insert(Reg); - else { - MBB.addLiveIn(Reg); - if (RegKills[Reg]) - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - } - - // Skip over the same register. - std::multimap<unsigned, int>::iterator NI = llvm::next(I); - while (NI != E && NI->first == Reg) { - ++I; - ++NI; - } - } - - for (std::set<unsigned>::iterator I = NotAvailable.begin(), - E = NotAvailable.end(); I != E; ++I) { - ClobberPhysReg(*I); - for (const unsigned *SubRegs = TRI->getSubRegisters(*I); - *SubRegs; ++SubRegs) - ClobberPhysReg(*SubRegs); - } -} - -/// ModifyStackSlotOrReMat - This method is called when the value in a stack -/// slot changes. This removes information about which register the previous -/// value for this slot lives in (as the previous value is dead now). -void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { - std::map<int, unsigned>::iterator It = - SpillSlotsOrReMatsAvailable.find(SlotOrReMat); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - SpillSlotsOrReMatsAvailable.erase(It); - - // This register may hold the value of multiple stack slots, only remove this - // stack slot from the set of values the register contains. - std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); - for (; ; ++I) { - assert(I != PhysRegsAvailable.end() && I->first == Reg && - "Map inverse broken!"); - if (I->second == SlotOrReMat) break; - } - PhysRegsAvailable.erase(I); -} - -void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { - std::map<int, unsigned>::iterator It = - SpillSlotsOrReMatsAvailable.find(StackSlot); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - - // Erase entries in PhysRegsAvailable for other stack slots. - std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); - while (I != PhysRegsAvailable.end() && I->first == Reg) { - std::multimap<unsigned, int>::iterator NextI = llvm::next(I); - if (I->second != StackSlot) { - DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " - << PrintReg(Reg, TRI) << '\n'); - SpillSlotsOrReMatsAvailable.erase(I->second); - PhysRegsAvailable.erase(I); - } - I = NextI; - } -} - -// ************************** // -// Reuse Info Implementation // -// ************************** // - -/// GetRegForReload - We are about to emit a reload into PhysReg. If there -/// is some other operand that is using the specified register, either pick -/// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, - unsigned PhysReg, - MachineFunction &MF, - MachineInstr *MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - SmallSet<unsigned, 8> &Rejected, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - VirtRegMap &VRM) { - const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - - if (Reuses.empty()) return PhysReg; // This is most often empty. - - for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { - ReusedOp &Op = Reuses[ro]; - // If we find some other reuse that was supposed to use this register - // exactly for its reload, we can change this reload to use ITS reload - // register. That is, unless its reload register has already been - // considered and subsequently rejected because it has also been reused - // by another operand. - if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0 && - RC->contains(Op.AssignedPhysReg)) { - // Yup, use the reload register that we didn't use before. - unsigned NewReg = Op.AssignedPhysReg; - Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } else { - // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload - // and use this one. - unsigned PRRU = Op.PhysRegReused; - if (TRI->regsOverlap(PRRU, PhysReg)) { - // Okay, we found out that an alias of a reused register - // was used. This isn't good because it means we have - // to undo a previous reuse. - MachineBasicBlock *MBB = MI->getParent(); - const TargetRegisterClass *AliasRC = - MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); - - // Copy Op out of the vector and remove it, we're going to insert an - // explicit load for it. - ReusedOp NewOp = Op; - Reuses.erase(Reuses.begin()+ro); - - // MI may be using only a sub-register of PhysRegUsed. - unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); - unsigned SubIdx = 0; - assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && - "A reuse cannot be a virtual register"); - if (PRRU != RealPhysRegUsed) { - // What was the sub-register index? - SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); - assert(SubIdx && - "Operand physreg is not a sub-register of PhysRegUsed"); - } - - // Ok, we're going to try to reload the assigned physreg into the - // slot that we were supposed to in the first place. However, that - // register could hold a reuse. Check to see if it conflicts or - // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, - MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; - int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, - DoReMat, SSorRMId, TII, MF); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, - TRI, VRM); - } else { - TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, - NewOp.StackSlotOrReMat, AliasRC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); - // Any stores to this stack slot are not dead anymore. - MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; - ++NumLoads; - } - Spills.ClobberPhysReg(NewPhysReg); - Spills.ClobberPhysReg(NewOp.PhysRegReused); - - unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg; - MI->getOperand(NewOp.Operand).setReg(RReg); - MI->getOperand(NewOp.Operand).setSubReg(0); - - Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - - DEBUG(dbgs() << "Reuse undone!\n"); - --NumReused; - - // Finally, PhysReg is now available, go ahead and use it. - return PhysReg; - } - } - } - return PhysReg; -} - -// ************************************************************************ // - -/// FoldsStackSlotModRef - Return true if the specified MI folds the specified -/// stack slot mod/ref. It also checks if it's possible to unfold the -/// instruction by having it define a specified physical register instead. -static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) - return false; - - bool Found = false; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - if (MR & VirtRegMap::isModRef) - if (VRM.getStackSlot(VirtReg) == SS) { - Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; - break; - } - } - if (!Found) - return false; - - // Does the instruction uses a register that overlaps the scratch register? - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (!VRM.hasPhys(Reg)) - continue; - Reg = VRM.getPhys(Reg); - } - if (TRI->regsOverlap(PhysReg, Reg)) - return false; - } - return true; -} - -/// FindFreeRegister - Find a free register of a given register class by looking -/// at (at most) the last two machine instructions. -static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, - MachineBasicBlock &MBB, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - BitVector &AllocatableRegs) { - BitVector Defs(TRI->getNumRegs()); - BitVector Uses(TRI->getNumRegs()); - SmallVector<unsigned, 4> LocalUses; - SmallVector<unsigned, 4> Kills; - - // Take a look at 2 instructions at most. - unsigned Count = 0; - while (Count < 2) { - if (MII == MBB.begin()) - break; - MachineInstr *PrevMI = prior(MII); - MII = PrevMI; - - if (PrevMI->isDebugValue()) - continue; // Skip over dbg_value instructions. - ++Count; - - for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = PrevMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - Defs.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.set(*AS); - } else { - LocalUses.push_back(Reg); - if (MO.isKill() && AllocatableRegs[Reg]) - Kills.push_back(Reg); - } - } - - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned Kill = Kills[i]; - if (!Defs[Kill] && !Uses[Kill] && - RC->contains(Kill)) - return Kill; - } - for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { - unsigned Reg = LocalUses[i]; - Uses.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.set(*AS); - } - } - - return 0; -} - -static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, - const TargetRegisterInfo &TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == VirtReg) - substitutePhysReg(MO, PhysReg, TRI); - } -} - -namespace { - -struct RefSorter { - bool operator()(const std::pair<MachineInstr*, int> &A, - const std::pair<MachineInstr*, int> &B) { - return A.second < B.second; - } -}; - -// ***************************** // -// Local Spiller Implementation // -// ***************************** // - -class LocalRewriter : public VirtRegRewriter { - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - VirtRegMap *VRM; - LiveIntervals *LIs; - BitVector AllocatableRegs; - DenseMap<MachineInstr*, unsigned> DistanceMap; - DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; - - MachineBasicBlock *MBB; // Basic block currently being processed. - -public: - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs); - -private: - void EraseInstr(MachineInstr *MI) { - VRM->RemoveMachineInstrFromMaps(MI); - LIs->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } - - bool OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - const TargetRegisterInfo *TRI); - - void SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet<MachineInstr*, 4> &ReMatDefs, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - void TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool InsertEmergencySpills(MachineInstr *MI); - - bool InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); - - bool InsertSpills(MachineInstr *MI); - - void ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector<MachineOperand*> &KillOps); - - void RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps); -}; -} - -bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, - LiveIntervals* lis) { - MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); - VRM = &vrm; - LIs = lis; - AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(dbgs() << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"); - DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" - " reloads!) ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Spills - Keep track of which spilled values are available in physregs - // so that we can choose to reuse the physregs instead of emitting - // reloads. This is usually refreshed per basic block. - AvailableSpills Spills(TRI, TII); - - // Keep track of kill information. - BitVector RegKills(TRI->getNumRegs()); - std::vector<MachineOperand*> KillOps; - KillOps.resize(TRI->getNumRegs(), NULL); - - // SingleEntrySuccs - Successor blocks which have a single predecessor. - SmallVector<MachineBasicBlock*, 4> SinglePredSuccs; - SmallPtrSet<MachineBasicBlock*,16> EarlyVisited; - - // Traverse the basic blocks depth first. - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet<MachineBasicBlock*,16> Visited; - for (df_ext_iterator<MachineBasicBlock*, - SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MBB = *DFI; - if (!EarlyVisited.count(MBB)) - RewriteMBB(LIs, Spills, RegKills, KillOps); - - // If this MBB is the only predecessor of a successor. Keep the - // availability information and visit it next. - do { - // Keep visiting single predecessor successor as long as possible. - SinglePredSuccs.clear(); - findSinglePredSuccessor(MBB, SinglePredSuccs); - if (SinglePredSuccs.empty()) - MBB = 0; - else { - // FIXME: More than one successors, each of which has MBB has - // the only predecessor. - MBB = SinglePredSuccs[0]; - if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { - Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); - RewriteMBB(LIs, Spills, RegKills, KillOps); - } - } - } while (MBB); - - // Clear the availability info. - Spills.clear(); - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Mark unused spill slots. - MachineFrameInfo *MFI = MF.getFrameInfo(); - int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) { - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { - SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS]; - if (!VRM->isSpillSlotUsed(SS)) { - MFI->RemoveStackObject(SS); - for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { - MachineInstr *DVMI = DbgValues[j]; - DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); - EraseInstr(DVMI); - } - ++NumDSS; - } - DbgValues.clear(); - } - } - Slot2DbgValues.clear(); - - return true; -} - -/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if -/// a scratch register is available. -/// xorq %r12<kill>, %r13 -/// addq %rax, -184(%rbp) -/// addq %r13, -184(%rbp) -/// ==> -/// xorq %r12<kill>, %r13 -/// movq -184(%rbp), %r12 -/// addq %rax, %r12 -/// addq %r13, %r12 -/// movq %r12, -184(%rbp) -bool LocalRewriter:: -OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - MachineBasicBlock::iterator NextMII = llvm::next(MII); - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - return false; - - if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) - return false; - - // Now let's see if the last couple of instructions happens to have freed up - // a register. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs); - if (!PhysReg) - return false; - - MachineFunction &MF = *MBB->getParent(); - TRI = MF.getTarget().getRegisterInfo(); - MachineInstr &MI = *MII; - if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // If the next instruction also folds the same SS modref and can be unfoled, - // then it's worthwhile to issue a load from SS into the free register and - // then unfold these instructions. - if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // Back-schedule reloads and remats. - ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); - - // Load from SS to the spare physical register. - TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); - // This invalidates Phys. - Spills.ClobberPhysReg(PhysReg); - // Remember it's available. - Spills.addAvailable(SS, PhysReg); - MaybeDeadStores[SS] = NULL; - - // Unfold current MI. - SmallVector<MachineInstr*, 4> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&MI, NewMIs[0]); - MII = MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - ++NumModRefUnfold; - - // Unfold next instructions that fold the same SS. - do { - MachineInstr &NextMI = *NextMII; - NextMII = llvm::next(NextMII); - NewMIs.clear(); - if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&NextMI, NewMIs[0]); - MBB->insert(NextMII, NewMIs[0]); - InvalidateKills(NextMI, TRI, RegKills, KillOps); - EraseInstr(&NextMI); - ++NumModRefUnfold; - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - break; - } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); - - // Store the value back into SS. - TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(NextMII); - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - - return true; -} - -/// OptimizeByUnfold - Turn a store folding instruction into a load folding -/// instruction. e.g. -/// xorl %edi, %eax -/// movl %eax, -32(%ebp) -/// movl -36(%ebp), %eax -/// orl %eax, -32(%ebp) -/// ==> -/// xorl %edi, %eax -/// orl -36(%ebp), %eax -/// mov %eax, -32(%ebp) -/// This enables unfolding optimization for a subsequent instruction which will -/// also eliminate the newly introduced store instruction. -bool LocalRewriter:: -OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector<MachineInstr*> &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - MachineFunction &MF = *MBB->getParent(); - MachineInstr &MI = *MII; - unsigned UnfoldedOpc = 0; - unsigned UnfoldPR = 0; - unsigned UnfoldVR = 0; - int FoldedSS = VirtRegMap::NO_STACK_SLOT; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - // Only transform a MI that folds a single register. - if (UnfoldedOpc) - return false; - UnfoldVR = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; - if (VRM->isAssignedReg(UnfoldVR)) - continue; - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - FoldedSS = VRM->getStackSlot(UnfoldVR); - MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; - if (DeadStore && (MR & VirtRegMap::isModRef)) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); - if (!PhysReg || !DeadStore->readsRegister(PhysReg)) - continue; - UnfoldPR = PhysReg; - UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), - false, true); - } - } - - if (!UnfoldedOpc) { - if (!UnfoldVR) - return false; - - // Look for other unfolding opportunities. - return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills, - RegKills, KillOps); - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) - continue; - if (VRM->isAssignedReg(VirtReg)) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - } else if (VRM->isReMaterialized(VirtReg)) - continue; - int SS = VRM->getStackSlot(VirtReg); - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - if (PhysReg) { - if (TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - continue; - } - if (VRM->hasPhys(VirtReg)) { - PhysReg = VRM->getPhys(VirtReg); - if (!TRI->regsOverlap(PhysReg, UnfoldPR)) - continue; - } - - // Ok, we'll need to reload the value into a register which makes - // it impossible to perform the store unfolding optimization later. - // Let's see if it is possible to fold the load if the store is - // unfolded. This allows us to perform the store unfolding - // optimization. - SmallVector<MachineInstr*, 4> NewMIs; - if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { - assert(NewMIs.size() == 1); - MachineInstr *NewMI = NewMIs.back(); - MBB->insert(MII, NewMI); - NewMIs.clear(); - int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); - assert(Idx != -1); - SmallVector<unsigned, 1> Ops; - Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); - NewMI->eraseFromParent(); - if (FoldedMI) { - VRM->addSpillSlotUse(SS, FoldedMI); - if (!VRM->hasPhys(UnfoldVR)) - VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = FoldedMI; - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - return true; - } - } - } - - return false; -} - -/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and -/// where SrcReg is r1 and it is tied to r0. Return true if after -/// commuting this instruction it will be r0 = op r2, r1. -static bool CommuteChangesDestination(MachineInstr *DefMI, - const MCInstrDesc &MCID, - unsigned SrcReg, - const TargetInstrInfo *TII, - unsigned &DstIdx) { - if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) - return false; - if (!DefMI->getOperand(1).isReg() || - DefMI->getOperand(1).getReg() != SrcReg) - return false; - unsigned DefIdx; - if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) - return false; - unsigned SrcIdx1, SrcIdx2; - if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) - return false; - if (SrcIdx1 == 1 && SrcIdx2 == 2) { - DstIdx = 2; - return true; - } - return false; -} - -/// CommuteToFoldReload - -/// Look for -/// r1 = load fi#1 -/// r1 = op r1, r2<kill> -/// store r1, fi#1 -/// -/// If op is commutable and r2 is killed, then we can xform these to -/// r2 = op r2, fi#1 -/// store r2, fi#1 -bool LocalRewriter:: -CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps, - const TargetRegisterInfo *TRI) { - if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) - return false; - - MachineInstr &MI = *MII; - MachineBasicBlock::iterator DefMII = prior(MII); - MachineInstr *DefMI = DefMII; - const MCInstrDesc &MCID = DefMI->getDesc(); - unsigned NewDstIdx; - if (DefMII != MBB->begin() && - MCID.isCommutable() && - CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) - return false; - MachineInstr *ReloadMI = prior(DefMII); - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); - if (DestReg != SrcReg || FrameIdx != SS) - return false; - int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); - if (UseIdx == -1) - return false; - unsigned DefIdx; - if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) - return false; - assert(DefMI->getOperand(DefIdx).isReg() && - DefMI->getOperand(DefIdx).getReg() == SrcReg); - - // Now commute def instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); - if (!CommutedMI) - return false; - MBB->insert(MII, CommutedMI); - SmallVector<unsigned, 1> Ops; - Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); - // Not needed since foldMemoryOperand returns new MI. - CommutedMI->eraseFromParent(); - if (!FoldedMI) - return false; - - VRM->addSpillSlotUse(SS, FoldedMI); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - // Insert new def MI and spill MI. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); - MII = prior(MII); - MachineInstr *StoreMI = MII; - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = FoldedMI; // Update MII to backtrack. - - // Delete all 3 old instructions. - InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); - EraseInstr(ReloadMI); - InvalidateKills(*DefMI, TRI, RegKills, KillOps); - EraseInstr(DefMI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - - // If NewReg was previously holding value of some SS, it's now clobbered. - // This has to be done now because it's a physical register. When this - // instruction is re-visited, it's ignored. - Spills.ClobberPhysReg(NewReg); - - ++NumCommutes; - return true; - } - - return false; -} - -/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if -/// the last store to the same slot is now dead. If so, remove the last store. -void LocalRewriter:: -SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet<MachineInstr*, 4> &ReMatDefs, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, - TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - - // If there is a dead store to this stack slot, nuke it now. - if (LastStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); - ++NumDSE; - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); - MachineBasicBlock::iterator PrevMII = LastStore; - bool CheckDef = PrevMII != MBB->begin(); - if (CheckDef) - --PrevMII; - EraseInstr(LastStore); - if (CheckDef) { - // Look at defs of killed registers on the store. Mark the defs - // as dead since the store has been deleted and they aren't - // being reused. - for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { - bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { - MachineInstr *DeadDef = PrevMII; - if (ReMatDefs.count(DeadDef) && !HasOtherDef) { - // FIXME: This assumes a remat def does not have side effects. - EraseInstr(DeadDef); - ++NumDRM; - } - } - } - } - } - - // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume - // the last of multiple instructions is the actual store. - LastStore = prior(oldNextMII); - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.ModifyStackSlotOrReMat(StackSlot); - Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, PhysReg, isAvailable); - ++NumStores; -} - -/// isSafeToDelete - Return true if this instruction doesn't produce any side -/// effect and all of its defs are dead. -static bool isSafeToDelete(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || - MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || - MI.isLabel() || MI.isDebugValue() || - MI.hasUnmodeledSideEffects()) - return false; - - // Technically speaking inline asm without side effects and no defs can still - // be deleted. But there is so much bad inline asm code out there, we should - // let them be. - if (MI.isInlineAsm()) - return false; - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - // FIXME: We can't remove kill markers or else the scavenger will assert. - // An alternative is to add a ADD pseudo instruction to replace kill - // markers. - return false; - } - return true; -} - -/// TransferDeadness - A identity copy definition is dead and it's being -/// removed. Find the last def or use and mark it as dead / kill. -void LocalRewriter:: -TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - SmallPtrSet<MachineInstr*, 4> Seens; - SmallVector<std::pair<MachineInstr*, int>,8> Refs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->isDebugValue() || UDMI->getParent() != MBB) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) - continue; - if (Seens.insert(UDMI)) - Refs.push_back(std::make_pair(UDMI, DI->second)); - } - - if (Refs.empty()) - return; - std::sort(Refs.begin(), Refs.end(), RefSorter()); - - while (!Refs.empty()) { - MachineInstr *LastUDMI = Refs.back().first; - Refs.pop_back(); - - MachineOperand *LastUD = NULL; - for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = LastUDMI->getOperand(i); - if (!MO.isReg() || MO.getReg() != Reg) - continue; - if (!LastUD || (LastUD->isUse() && MO.isDef())) - LastUD = &MO; - if (LastUDMI->isRegTiedToDefOperand(i)) - break; - } - if (LastUD->isDef()) { - // If the instruction has no side effect, delete it and propagate - // backward further. Otherwise, mark is dead and we are done. - if (!isSafeToDelete(*LastUDMI)) { - LastUD->setIsDead(); - break; - } - EraseInstr(LastUDMI); - } else { - LastUD->setIsKill(); - RegKills.set(Reg); - KillOps[Reg] = LastUD; - break; - } - } -} - -/// InsertEmergencySpills - Insert emergency spills before MI if requested by -/// VRM. Return true if spills were inserted. -bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { - if (!VRM->hasEmergencySpills(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - SmallSet<int, 4> UsedSS; - std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI); - for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { - unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); - assert(RC && "Unable to determine register class!"); - int SS = VRM->getEmergencySpillSlot(RC); - if (UsedSS.count(SS)) - llvm_unreachable("Need to spill more than one physical registers!"); - UsedSS.insert(SS); - TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(MII); - VRM->addSpillSlotUse(SS, StoreMI); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, - TII, *MBB->getParent()); - - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); - - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SS, LoadMI); - ++NumPSpills; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - return true; -} - -/// InsertRestores - Restore registers before MI is requested by VRM. Return -/// true is any instructions were inserted. -bool LocalRewriter::InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - if (!VRM->isRestorePt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI); - for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { - unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - - // Check if the value being restored if available. If so, it must be - // from a predecessor BB that fallthrough into this BB. We do not - // expect: - // BB1: - // r1 = load fi#1 - // ... - // = r1<kill> - // ... # r1 not clobbered - // ... - // = load fi#1 - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - if (InReg == Phys) { - // If the value is already available in the expected register, save - // a reload / remat. - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" instead of reloading into physreg " - << TRI->getName(Phys) << '\n'); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to update - // the kill flags for the current instruction after processing it. - - ++NumOmitted; - continue; - } else if (InReg && InReg != Phys) { - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" by copying it into physreg " - << TRI->getName(Phys) << '\n'); - - // If the reloaded / remat value is available in another register, - // copy it to the desired register. - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), Phys) - .addReg(InReg, RegState::Kill); - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - DEBUG(dbgs() << '\t' << *CopyMI); - ++NumCopified; - continue; - } - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - - if (VRM->isReMaterialized(VirtReg)) { - ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(MII)); - } - return true; -} - -/// InsertSpills - Insert spills after MI if requested by VRM. Return -/// true if spills were inserted. -bool LocalRewriter::InsertSpills(MachineInstr *MI) { - if (!VRM->isSpillPt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector<std::pair<unsigned,bool> > &SpillRegs = - VRM->getSpillPtSpills(MI); - for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { - unsigned VirtReg = SpillRegs[i].first; - bool isKill = SpillRegs[i].second; - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - unsigned Phys = VRM->getPhys(VirtReg); - int StackSlot = VRM->getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, - RC, TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - } - return true; -} - - -/// ProcessUses - Process all of MI's spilled operands and all available -/// operands. -void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector<MachineInstr*> &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector<MachineOperand*> &KillOps) { - // Clear kill info. - SmallSet<unsigned, 2> KilledMIRegs; - SmallVector<unsigned, 4> VirtUseOps; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; // Ignore non-register operands. - - unsigned VirtReg = MO.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { - // Ignore physregs for spilling, but remember that it is used by this - // function. - MRI->setPhysRegUsed(VirtReg); - continue; - } - - // We want to process implicit virtual register uses first. - if (MO.isImplicit()) - // If the virtual register is implicitly defined, emit a implicit_def - // before so scavenger knows it's "defined". - // FIXME: This is a horrible hack done the by register allocator to - // remat a definition with virtual register operand. - VirtUseOps.insert(VirtUseOps.begin(), i); - else - VirtUseOps.push_back(i); - - // A partial def causes problems because the same operand both reads and - // writes the register. This rewriter is designed to rewrite uses and defs - // separately, so a partial def would already have been rewritten to a - // physreg by the time we get to processing defs. - // Add an implicit use operand to model the partial def. - if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) && - MI.findRegisterUseOperandIdx(VirtReg) == -1) { - VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands()); - MI.addOperand(MachineOperand::CreateReg(VirtReg, - false, // isDef - true)); // isImplicit - DEBUG(dbgs() << "Partial redef: " << MI); - } - } - - // Process all of the spilled uses and all non spilled reg references. - SmallVector<int, 2> PotentialDeadStoreSlots; - KilledMIRegs.clear(); - for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { - unsigned i = VirtUseOps[j]; - unsigned VirtReg = MI.getOperand(i).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register?"); - - unsigned SubIdx = MI.getOperand(i).getSubReg(); - if (VRM->isAssignedReg(VirtReg)) { - // This virtual register was assigned a physreg! - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - if (MI.getOperand(i).isDef()) - ReusedOperands.markClobbered(Phys); - substitutePhysReg(MI.getOperand(i), Phys, *TRI); - if (VRM->isImplicitlyDefined(VirtReg)) - // FIXME: Is this needed? - BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Phys); - continue; - } - - // This virtual register is now known to be a spilled value. - if (!MI.getOperand(i).isUse()) - continue; // Handle defs in the loop below (handle use&def here though) - - bool AvoidReload = MI.getOperand(i).isUndef(); - // Check if it is defined by an implicit def. It should not be spilled. - // Note, this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - int ReuseSlot = SSorRMId; - - // Check to see if this stack slot is available. - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - - // If this is a sub-register use, make sure the reuse register is in the - // right register class. For example, for x86 not all of the 32-bit - // registers have accessible sub-registers. - // Similarly so for EXTRACT_SUBREG. Consider this: - // EDI = op - // MOV32_mr fi#1, EDI - // ... - // = EXTRACT_SUBREG fi#1 - // fi#1 is available in EDI, but it cannot be reused because it's not in - // the right register file. - if (PhysReg && !AvoidReload && SubIdx) { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - if (!RC->contains(PhysReg)) - PhysReg = 0; - } - - if (PhysReg && !AvoidReload) { - // This spilled operand might be part of a two-address operand. If this - // is the case, then changing it will necessarily require changing the - // def part of the instruction as well. However, in some cases, we - // aren't allowed to modify the reused register. If none of these cases - // apply, reuse it. - bool CanReuse = true; - bool isTied = MI.isRegTiedToDefOperand(i); - if (isTied) { - // Okay, we have a two address operand. We can reuse this physreg as - // long as we are allowed to clobber the value and there isn't an - // earlier def that has already clobbered the physreg. - CanReuse = !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg); - } - // If this is an asm, and a PhysReg alias is used elsewhere as an - // earlyclobber operand, we can't also use it as an input. - if (MI.isInlineAsm()) { - for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { - MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.isEarlyClobber() && - TRI->regsOverlap(MOk.getReg(), PhysReg)) { - CanReuse = false; - DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) << ": " << MOk - << '\n'); - break; - } - } - } - - if (CanReuse) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) - << " instead of reloading into " - << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to - // update the kill flags for the current instr after processing it. - - // The only technical detail we have is that we don't know that - // PhysReg won't be clobbered by a reloaded stack slot that occurs - // later in the instruction. In particular, consider 'op V1, V2'. - // If V1 is available in physreg R0, we would choose to reuse it - // here, instead of reloading it into the register the allocator - // indicated (say R1). However, V2 might have to be reloaded - // later, and it might indicate that it needs to live in R0. When - // this occurs, we need to have information available that - // indicates it is safe to use R1 for the reload instead of R0. - // - // To further complicate matters, we might conflict with an alias, - // or R0 and R1 might not be compatible with each other. In this - // case, we actually insert a reload for V1 in R1, ensuring that - // we can get at R0 or its alias. - ReusedOperands.addReuse(i, ReuseSlot, PhysReg, - VRM->getPhys(VirtReg), VirtReg); - if (isTied) - // Only mark it clobbered if this is a use&def operand. - ReusedOperands.markClobbered(PhysReg); - ++NumReused; - - if (MI.getOperand(i).isKill() && - ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { - - // The store of this spilled value is potentially dead, but we - // won't know for certain until we've confirmed that the re-use - // above is valid, which means waiting until the other operands - // are processed. For now we just track the spill slot, we'll - // remove it after the other operands are processed if valid. - - PotentialDeadStoreSlots.push_back(ReuseSlot); - } - - // Mark is isKill if it's there no other uses of the same virtual - // register and it's not a two-address operand. IsKill will be - // unset if reg is reused. - if (!isTied && KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - continue; - } // CanReuse - - // Otherwise we have a situation where we have a two-address instruction - // whose mod/ref operand needs to be reloaded. This reload is already - // available in some register "PhysReg", but if we used PhysReg as the - // operand to our 2-addr instruction, the instruction would modify - // PhysReg. This isn't cool if something later uses PhysReg and expects - // to get its initial value. - // - // To avoid this problem, and to avoid doing a load right after a store, - // we emit a copy from PhysReg into the designated register for this - // operand. - // - // This case also applies to an earlyclobber'd PhysReg. - unsigned DesignatedReg = VRM->getPhys(VirtReg); - assert(DesignatedReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands. - GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, - MaybeDeadStores, RegKills, KillOps, *VRM); - - // If the mapped designated register is actually the physreg we have - // incoming, we don't need to inserted a dead copy. - if (DesignatedReg == PhysReg) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) - << " instead of reloading into same physreg.\n"); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - ReusedOperands.markClobbered(RReg); - ++NumReused; - continue; - } - - MRI->setPhysRegUsed(DesignatedReg); - ReusedOperands.markClobbered(DesignatedReg); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), - DesignatedReg).addReg(PhysReg); - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - // This invalidates DesignatedReg. - Spills.ClobberPhysReg(DesignatedReg); - - Spills.addAvailable(ReuseSlot, DesignatedReg); - unsigned RReg = - SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - ++NumReused; - continue; - } // if (PhysReg) - - // Otherwise, reload it and remember that we have it. - PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - - MRI->setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - if (AvoidReload) - ++NumAvoided; - else { - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - // This invalidates PhysReg. - Spills.ClobberPhysReg(PhysReg); - - // Any stores to this stack slot are not dead anymore. - if (!DoReMat) - MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, PhysReg); - // Assumes this is the last use. IsKill will be unset if reg is reused - // unless it's a two-address operand. - if (!MI.isRegTiedToDefOperand(i) && - KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - } - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } - - // Ok - now we can remove stores that have been confirmed dead. - for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { - // This was the last use and the spilled value is still available - // for reuse. That means the spill was unnecessary! - int PDSSlot = PotentialDeadStoreSlots[j]; - MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; - if (DeadStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - MaybeDeadStores[PDSSlot] = NULL; - ++NumDSE; - } - } -} - -/// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avoid reloading vregs. -void -LocalRewriter::RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - - DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" - << MBB->getName() << "':\n"); - - MachineFunction &MF = *MBB->getParent(); - - // MaybeDeadStores - When we need to write a value back into a stack slot, - // keep track of the inserted store. If the stack slot value is never read - // (because the value was used from some available register, for example), and - // subsequently stored to, the original store is dead. This map keeps track - // of inserted stores that are not used. If we see a subsequent store to the - // same stack slot, the original store is deleted. - std::vector<MachineInstr*> MaybeDeadStores; - MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); - - // ReMatDefs - These are rematerializable def MIs which are not deleted. - SmallSet<MachineInstr*, 4> ReMatDefs; - - // Keep track of the registers we have already spilled in case there are - // multiple defs of the same register in MI. - SmallSet<unsigned, 8> SpilledMIRegs; - - RegKills.reset(); - KillOps.clear(); - KillOps.resize(TRI->getNumRegs(), NULL); - - DistanceMap.clear(); - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ) { - MachineBasicBlock::iterator NextMII = llvm::next(MII); - - if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps)) - NextMII = llvm::next(MII); - - if (InsertEmergencySpills(MII)) - NextMII = llvm::next(MII); - - InsertRestores(MII, Spills, RegKills, KillOps); - - if (InsertSpills(MII)) - NextMII = llvm::next(MII); - - bool Erased = false; - bool BackTracked = false; - MachineInstr &MI = *MII; - - // Remember DbgValue's which reference stack slots. - if (MI.isDebugValue() && MI.getOperand(0).isFI()) - Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); - - /// ReusedOperands - Keep track of operand reuse in case we need to undo - /// reuse. - ReuseInfo ReusedOperands(MI, TRI); - - ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps); - - DEBUG(dbgs() << '\t' << MI); - - - // If we have folded references to memory operands, make sure we clear all - // physical registers that may contain the value of the spilled virtual - // register - - // Copy the folded virts to a small vector, we may change MI2VirtMap. - SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts; - // C++0x FTW! - for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator, - VirtRegMap::MI2VirtMapTy::const_iterator> FVRange = - VRM->getFoldedVirts(&MI); - FVRange.first != FVRange.second; ++FVRange.first) - FoldedVirts.push_back(FVRange.first->second); - - SmallSet<int, 2> FoldedSS; - for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { - unsigned VirtReg = FoldedVirts[FVI].first; - VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); - - int SS = VRM->getStackSlot(VirtReg); - if (SS == VirtRegMap::NO_STACK_SLOT) - continue; - FoldedSS.insert(SS); - DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); - - // If this folded instruction is just a use, check to see if it's a - // straight load from the virt reg slot. - if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); - if (DestReg && FrameIdx == SS) { - // If this spill slot is available, turn it into a copy (or nothing) - // instead of leaving it as a load! - if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(dbgs() << "Promoted Load To Copy: " << MI); - if (DestReg != InReg) { - MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DestReg, RegState::Define, DefMO->getSubReg()) - .addReg(InReg, RegState::Kill); - // Revisit the copy so we make sure to notice the effects of the - // operation on the destreg (either needing to RA it if it's - // virtual or needing to clobber any values if it's physical). - NextMII = CopyMI; - NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - BackTracked = true; - } else { - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - // InvalidateKills resurrects any prior kill of the copy's source - // allowing the source reg to be reused in place of the copy. - Spills.disallowClobberPhysReg(InReg); - } - - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - goto ProcessNextInst; - } - } else { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector<MachineInstr*, 4> NewMIs; - if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ - MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - goto ProcessNextInst; - } - } - } - - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - MachineInstr* DeadStore = MaybeDeadStores[SS]; - if (DeadStore) { - bool isDead = !(MR & VirtRegMap::isRef); - MachineInstr *NewStore = NULL; - if (MR & VirtRegMap::isModRef) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector<MachineInstr*, 4> NewMIs; - // We can reuse this physreg as long as we are allowed to clobber - // the value and there isn't an earlier def that has already clobbered - // the physreg. - if (PhysReg && - !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg) && - !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! - MachineOperand *KillOpnd = - DeadStore->findRegisterUseOperand(PhysReg, true); - // Note, if the store is storing a sub-register, it's possible the - // super-register is needed below. - if (KillOpnd && !KillOpnd->getSubReg() && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ - MBB->insert(MII, NewMIs[0]); - NewStore = NewMIs[1]; - MBB->insert(MII, NewStore); - VRM->addSpillSlotUse(SS, NewStore); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - isDead = true; - ++NumSUnfold; - } - } - } - - if (isDead) { // Previous store is dead. - // If we get here, the store is dead, nuke it now. - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - if (!NewStore) - ++NumDSE; - } - - MaybeDeadStores[SS] = NULL; - if (NewStore) { - // Treat this store as a spill merged into a copy. That makes the - // stack slot value available. - VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod); - goto ProcessNextInst; - } - } - - // If the spill slot value is available, and this is a new definition of - // the value, the value is not available anymore. - if (MR & VirtRegMap::isMod) { - // Notice that the value in this stack slot has been modified. - Spills.ModifyStackSlotOrReMat(SS); - - // If this is *just* a mod of the value, check to see if this is just a - // store to the spill slot (i.e. the spill got merged into the copy). If - // so, realize that the vreg is available now, and add the store to the - // MaybeDeadStore info. - int StackSlot; - if (!(MR & VirtRegMap::isRef)) { - if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Src hasn't been allocated yet?"); - - if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot, - Spills, RegKills, KillOps, TRI)) { - NextMII = llvm::next(MII); - BackTracked = true; - goto ProcessNextInst; - } - - // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark - // this as a potentially dead store in case there is a subsequent - // store into the stack slot without a read from it. - MaybeDeadStores[StackSlot] = &MI; - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register - // available in PhysReg. - Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); - } - } - } - } - - // Process all of the spilled defs. - SpilledMIRegs.clear(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!(MO.isReg() && MO.getReg() && MO.isDef())) - continue; - - unsigned VirtReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - // Also check if it's copying from an "undef", if so, we can't - // eliminate this or else the undef marker is lost and it will - // confuses the scavenger. This is extremely rare. - if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && - MI.getNumOperands() == 2) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); - // Last def is now dead. - TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); - } - EraseInstr(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } - - // If it's not a no-op copy, it clobbers the value in the destreg. - Spills.ClobberPhysReg(VirtReg); - ReusedOperands.markClobbered(VirtReg); - - // Check to see if this instruction is a load from a stack slot into - // a register. If so, this provides the stack slot value in the reg. - int FrameIdx; - if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { - assert(DestReg == VirtReg && "Unknown load situation!"); - - // If it is a folded reference, then it's not safe to clobber. - bool Folded = FoldedSS.count(FrameIdx); - // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, DestReg, !Folded); - goto ProcessNextInst; - } - - continue; - } - - unsigned SubIdx = MO.getSubReg(); - bool DoReMat = VRM->isReMaterialized(VirtReg); - if (DoReMat) - ReMatDefs.insert(&MI); - - // The only vregs left are stack slot definitions. - int StackSlot = VRM->getStackSlot(VirtReg); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - - // If this def is part of a two-address operand, make sure to execute - // the store from the correct physical register. - unsigned PhysReg; - unsigned TiedOp; - if (MI.isRegTiedToUseOperand(i, &TiedOp)) { - PhysReg = MI.getOperand(TiedOp).getReg(); - if (SubIdx) { - unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); - assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && - "Can't find corresponding super-register!"); - PhysReg = SuperReg; - } - } else { - PhysReg = VRM->getPhys(VirtReg); - if (ReusedOperands.isClobbered(PhysReg)) { - // Another def has taken the assigned physreg. It must have been a - // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - } - } - - // If StackSlot is available in a register that also holds other stack - // slots, clobber those stack slots now. - Spills.ClobberSharingStackSlots(StackSlot); - - assert(PhysReg && "VR not assigned a physical register?"); - MRI->setPhysRegUsed(PhysReg); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - ReusedOperands.markClobbered(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { - MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; - SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, - LastStore, Spills, ReMatDefs, RegKills, KillOps); - NextMII = llvm::next(MII); - - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - if (MI.isIdentityCopy()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } - } - ProcessNextInst: - // Delete dead instructions without side effects. - if (!Erased && !BackTracked && isSafeToDelete(MI)) { - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - } - if (!Erased) - DistanceMap.insert(std::make_pair(&MI, DistanceMap.size())); - if (!Erased && !BackTracked) { - for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) - UpdateKills(*II, TRI, RegKills, KillOps); - } - MII = NextMII; - } - -} - -llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { - switch (RewriterOpt) { - default: llvm_unreachable("Unreachable!"); - case local: - return new LocalRewriter(); - case trivial: - return new TrivialRewriter(); - } -} diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h deleted file mode 100644 index 93474e0d7ff7..000000000000 --- a/lib/CodeGen/VirtRegRewriter.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H -#define LLVM_CODEGEN_VIRTREGREWRITER_H - -namespace llvm { - class LiveIntervals; - class MachineFunction; - class VirtRegMap; - - /// VirtRegRewriter interface: Implementations of this interface assign - /// spilled virtual registers to stack slots, rewriting the code. - struct VirtRegRewriter { - virtual ~VirtRegRewriter(); - virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) = 0; - }; - - /// createVirtRegRewriter - Create an return a rewriter object, as specified - /// on the command line. - VirtRegRewriter* createVirtRegRewriter(); - -} - -#endif |